Improve URL checker with browser headers and redirect detection

This commit is contained in:
8man
2026-05-11 16:48:14 +05:30
parent 0081034e35
commit 1d39a56cc0
+244 -196
View File
@@ -1,196 +1,244 @@
const fs = require('fs'); const fs = require('fs');
const axios = require('axios'); const axios = require('axios');
const FILE_PATH = 'modflix.json'; const FILE_PATH = 'modflix.json';
const updatedProviders = []; // Track updated providers for Discord notification const updatedProviders = []; // Track updated providers for Discord notification
// Read the modflix.json file const DEFAULT_HEADERS = {
function readModflixJson() { 'User-Agent':
try { 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
const data = fs.readFileSync(FILE_PATH, 'utf8'); Accept:
return JSON.parse(data); 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
} catch (error) { 'Accept-Language': 'en-US,en;q=0.9',
console.error(`Error reading ${FILE_PATH}:`, error); 'Accept-Encoding': 'gzip, deflate, br',
process.exit(1); Connection: 'keep-alive'
} };
}
// Read the modflix.json file
// Extract domain (origin) from URL without trailing slash function readModflixJson() {
function getDomain(url) { try {
try { const data = fs.readFileSync(FILE_PATH, 'utf8');
const urlObj = new URL(url); return JSON.parse(data);
return urlObj.origin; } catch (error) {
} catch (error) { console.error(`Error reading ${FILE_PATH}:`, error);
console.error(`Error parsing URL ${url}:`, error); process.exit(1);
return url; }
} }
}
// Extract domain (origin) from URL without trailing slash
// Check if original URL has a trailing slash in path function getDomain(url) {
function hasTrailingSlash(url) { try {
return url.endsWith('/') && !url.endsWith('://'); const urlObj = new URL(url);
} return urlObj.origin;
} catch (error) {
// Check URL and return new URL if domain redirected console.error(`Error parsing URL ${url}:`, error);
async function checkUrl(url) { return url;
try { }
// Set timeout to 10 seconds to avoid hanging }
const response = await axios.head(url, {
maxRedirects: 0, // Check if original URL has a trailing slash in path
timeout: 10000, function hasTrailingSlash(url) {
validateStatus: status => true return url.endsWith('/') && !url.endsWith('://');
}); }
// If status is 200, no change needed function getFinalUrl(response, originalUrl) {
if (response.status === 200) { return (
console.log(`${url} is valid (200 OK)`); response?.request?.res?.responseUrl ||
return null; response?.request?._redirectable?._currentUrl ||
} else if (response.status >= 300 && response.status < 400) { response?.config?.url ||
// Handle redirects originalUrl
const newLocation = response.headers.location; );
if (newLocation) { }
// If it's a relative redirect, construct the full URL
let fullRedirectUrl = newLocation; async function requestUrl(method, url) {
if (!newLocation.startsWith('http')) { return axios({
const baseUrl = new URL(url); method,
fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString(); url,
} maxRedirects: 5,
timeout: 10000,
console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`); validateStatus: status => true,
headers: DEFAULT_HEADERS
// Get the new domain });
const newDomain = getDomain(fullRedirectUrl); }
// Check if original URL had a trailing slash // Check URL and return new URL if domain redirected
const needsTrailingSlash = hasTrailingSlash(url); async function checkUrl(url) {
try {
// Create new URL: new domain + trailing slash if the original had one const response = await requestUrl('head', url);
let finalUrl = newDomain; const finalUrl = getFinalUrl(response, url);
if (needsTrailingSlash) {
finalUrl += '/'; if (response.status === 200) {
} const originalDomain = getDomain(url);
const finalDomain = getDomain(finalUrl);
console.log(`Will update to: ${finalUrl} (preserved trailing slash: ${needsTrailingSlash})`);
return finalUrl; if (finalDomain !== originalDomain) {
} console.log(`🔄 ${url} resolved to ${finalUrl}`);
} else { const needsTrailingSlash = hasTrailingSlash(url);
console.log(`⚠️ ${url} returned status ${response.status}`); let updatedUrl = finalDomain;
} if (needsTrailingSlash) {
} catch (error) { updatedUrl += '/';
// Try GET request if HEAD fails }
try { console.log(
const response = await axios.get(url, { `Will update to: ${updatedUrl} (preserved trailing slash: ${needsTrailingSlash})`
maxRedirects: 0, );
timeout: 10000, return updatedUrl;
validateStatus: status => true }
});
console.log(`${url} is valid (200 OK)`);
if (response.status === 200) { return null;
console.log(`${url} is valid (200 OK)`); }
return null;
} else if (response.status >= 300 && response.status < 400) { if (response.status >= 300 && response.status < 400) {
// Handle redirects const newLocation = response.headers.location;
const newLocation = response.headers.location; if (newLocation) {
if (newLocation) { let fullRedirectUrl = newLocation;
console.log(`🔄 ${url} redirects to ${newLocation}`); if (!newLocation.startsWith('http')) {
const baseUrl = new URL(url);
let fullRedirectUrl = newLocation; fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString();
if (!newLocation.startsWith('http')) { }
const baseUrl = new URL(url);
fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString(); console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`);
} const newDomain = getDomain(fullRedirectUrl);
const needsTrailingSlash = hasTrailingSlash(url);
// Get the new domain let finalUrlForUpdate = newDomain;
const newDomain = getDomain(fullRedirectUrl); if (needsTrailingSlash) {
finalUrlForUpdate += '/';
// Check if original URL had a trailing slash }
const needsTrailingSlash = hasTrailingSlash(url);
console.log(
// Create new URL: new domain + trailing slash if the original had one `Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})`
let finalUrl = newDomain; );
if (needsTrailingSlash) { return finalUrlForUpdate;
finalUrl += '/'; }
} }
console.log(`Will update to: ${finalUrl} (preserved trailing slash: ${needsTrailingSlash})`); console.log(`⚠️ ${url} returned status ${response.status}`);
return finalUrl; } catch (error) {
} // Try GET request if HEAD fails
} else { try {
console.log(`⚠️ ${url} returned status ${response.status}`); const response = await requestUrl('get', url);
} const finalUrl = getFinalUrl(response, url);
} catch (getError) {
if (getError.response) { if (response.status === 200) {
console.log(`⚠️ ${url} returned status ${getError.response.status}`); const originalDomain = getDomain(url);
} else if (getError.code === 'ECONNABORTED') { const finalDomain = getDomain(finalUrl);
console.log(`${url} request timed out`);
} else if (getError.code === 'ENOTFOUND') { if (finalDomain !== originalDomain) {
console.log(` ${url} domain not found`); console.log(`🔄 ${url} resolved to ${finalUrl}`);
} else { const needsTrailingSlash = hasTrailingSlash(url);
console.log(`❌ Error checking ${url}: ${getError.message}`); let updatedUrl = finalDomain;
} if (needsTrailingSlash) {
} updatedUrl += '/';
} }
console.log(
// Return null if no change or error `Will update to: ${updatedUrl} (preserved trailing slash: ${needsTrailingSlash})`
return null; );
} return updatedUrl;
}
// Main function
async function main() { console.log(`${url} is valid (200 OK)`);
const providers = readModflixJson(); return null;
let hasChanges = false; }
// Process each provider if (response.status >= 300 && response.status < 400) {
for (const [key, provider] of Object.entries(providers)) { const newLocation = response.headers.location;
const url = provider.url; if (newLocation) {
console.log(`Checking ${provider.name} (${url})...`); console.log(`🔄 ${url} redirects to ${newLocation}`);
try { let fullRedirectUrl = newLocation;
const newUrl = await checkUrl(url); if (!newLocation.startsWith('http')) {
if (newUrl && newUrl !== url) { const baseUrl = new URL(url);
// Store the old URL before updating fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString();
const oldUrl = provider.url; }
// Update the provider URL const newDomain = getDomain(fullRedirectUrl);
provider.url = newUrl; const needsTrailingSlash = hasTrailingSlash(url);
hasChanges = true; let finalUrlForUpdate = newDomain;
console.log(`Updated ${provider.name} URL from ${oldUrl} to ${newUrl}`); if (needsTrailingSlash) {
finalUrlForUpdate += '/';
// Track updated provider for Discord notification }
updatedProviders.push({
name: provider.name, console.log(
oldUrl: oldUrl, `Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})`
newUrl: newUrl );
}); return finalUrlForUpdate;
} }
} catch (error) { }
console.log(`❌ Error processing ${url}: ${error.message}`);
} console.log(`⚠️ ${url} returned status ${response.status}`);
} } catch (getError) {
if (getError.response) {
// Write changes back to file if needed console.log(`⚠️ ${url} returned status ${getError.response.status}`);
if (hasChanges) { } else if (getError.code === 'ECONNABORTED') {
// Use a space-efficient JSON format but with proper formatting console.log(`${url} request timed out`);
const jsonString = JSON.stringify(providers, null, 2); } else if (getError.code === 'ENOTFOUND') {
fs.writeFileSync(FILE_PATH, jsonString); console.log(`${url} domain not found`);
console.log(`✅ Updated ${FILE_PATH} with new URLs`); } else {
console.log(`❌ Error checking ${url}: ${getError.message}`);
// Output updated providers for Discord notification in a clean format }
if (updatedProviders.length > 0) { }
console.log("\n### UPDATED_PROVIDERS_START ###"); }
for (const provider of updatedProviders) {
// Format: name|oldUrl|newUrl (pipe-delimited for easy parsing) // Return null if no change or error
console.log(`${provider.name}|${provider.oldUrl}|${provider.newUrl}`); return null;
} }
console.log("### UPDATED_PROVIDERS_END ###");
} // Main function
} else { async function main() {
console.log(`️ No changes needed for ${FILE_PATH}`); const providers = readModflixJson();
} let hasChanges = false;
}
// Process each provider
// Execute main function with error handling for (const [key, provider] of Object.entries(providers)) {
main().catch(error => { const url = provider.url;
console.error('Unhandled error:', error); console.log(`Checking ${provider.name} (${url})...`);
process.exit(1);
}); try {
const newUrl = await checkUrl(url);
if (newUrl && newUrl !== url) {
// Store the old URL before updating
const oldUrl = provider.url;
// Update the provider URL
provider.url = newUrl;
hasChanges = true;
console.log(`Updated ${provider.name} URL from ${oldUrl} to ${newUrl}`);
// Track updated provider for Discord notification
updatedProviders.push({
name: provider.name,
oldUrl: oldUrl,
newUrl: newUrl
});
}
} catch (error) {
console.log(`❌ Error processing ${url}: ${error.message}`);
}
}
// Write changes back to file if needed
if (hasChanges) {
// Use a space-efficient JSON format but with proper formatting
const jsonString = JSON.stringify(providers, null, 2);
fs.writeFileSync(FILE_PATH, jsonString);
console.log(`✅ Updated ${FILE_PATH} with new URLs`);
// Output updated providers for Discord notification in a clean format
if (updatedProviders.length > 0) {
console.log("\n### UPDATED_PROVIDERS_START ###");
for (const provider of updatedProviders) {
// Format: name|oldUrl|newUrl (pipe-delimited for easy parsing)
console.log(`${provider.name}|${provider.oldUrl}|${provider.newUrl}`);
}
console.log("### UPDATED_PROVIDERS_END ###");
}
} else {
console.log(`️ No changes needed for ${FILE_PATH}`);
}
}
// Execute main function with error handling
main().catch(error => {
console.error('Unhandled error:', error);
process.exit(1);
});