Improve URL checker with browser headers and redirect detection

This commit is contained in:
8man
2026-05-11 16:48:14 +05:30
parent 0081034e35
commit 1d39a56cc0
+88 -40
View File
@@ -4,6 +4,16 @@ const axios = require('axios');
const FILE_PATH = 'modflix.json'; const FILE_PATH = 'modflix.json';
const updatedProviders = []; // Track updated providers for Discord notification const updatedProviders = []; // Track updated providers for Discord notification
const DEFAULT_HEADERS = {
'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36',
Accept:
'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
Connection: 'keep-alive'
};
// Read the modflix.json file // Read the modflix.json file
function readModflixJson() { function readModflixJson() {
try { try {
@@ -31,25 +41,56 @@ function hasTrailingSlash(url) {
return url.endsWith('/') && !url.endsWith('://'); return url.endsWith('/') && !url.endsWith('://');
} }
function getFinalUrl(response, originalUrl) {
return (
response?.request?.res?.responseUrl ||
response?.request?._redirectable?._currentUrl ||
response?.config?.url ||
originalUrl
);
}
async function requestUrl(method, url) {
return axios({
method,
url,
maxRedirects: 5,
timeout: 10000,
validateStatus: status => true,
headers: DEFAULT_HEADERS
});
}
// Check URL and return new URL if domain redirected // Check URL and return new URL if domain redirected
async function checkUrl(url) { async function checkUrl(url) {
try { try {
// Set timeout to 10 seconds to avoid hanging const response = await requestUrl('head', url);
const response = await axios.head(url, { const finalUrl = getFinalUrl(response, url);
maxRedirects: 0,
timeout: 10000,
validateStatus: status => true
});
// If status is 200, no change needed
if (response.status === 200) { if (response.status === 200) {
const originalDomain = getDomain(url);
const finalDomain = getDomain(finalUrl);
if (finalDomain !== originalDomain) {
console.log(`🔄 ${url} resolved to ${finalUrl}`);
const needsTrailingSlash = hasTrailingSlash(url);
let updatedUrl = finalDomain;
if (needsTrailingSlash) {
updatedUrl += '/';
}
console.log(
`Will update to: ${updatedUrl} (preserved trailing slash: ${needsTrailingSlash})`
);
return updatedUrl;
}
console.log(`${url} is valid (200 OK)`); console.log(`${url} is valid (200 OK)`);
return null; return null;
} else if (response.status >= 300 && response.status < 400) { }
// Handle redirects
if (response.status >= 300 && response.status < 400) {
const newLocation = response.headers.location; const newLocation = response.headers.location;
if (newLocation) { if (newLocation) {
// If it's a relative redirect, construct the full URL
let fullRedirectUrl = newLocation; let fullRedirectUrl = newLocation;
if (!newLocation.startsWith('http')) { if (!newLocation.startsWith('http')) {
const baseUrl = new URL(url); const baseUrl = new URL(url);
@@ -57,39 +98,49 @@ async function checkUrl(url) {
} }
console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`); console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`);
// Get the new domain
const newDomain = getDomain(fullRedirectUrl); const newDomain = getDomain(fullRedirectUrl);
// Check if original URL had a trailing slash
const needsTrailingSlash = hasTrailingSlash(url); const needsTrailingSlash = hasTrailingSlash(url);
let finalUrlForUpdate = newDomain;
// Create new URL: new domain + trailing slash if the original had one
let finalUrl = newDomain;
if (needsTrailingSlash) { if (needsTrailingSlash) {
finalUrl += '/'; finalUrlForUpdate += '/';
} }
console.log(`Will update to: ${finalUrl} (preserved trailing slash: ${needsTrailingSlash})`); console.log(
return finalUrl; `Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})`
);
return finalUrlForUpdate;
} }
} else { }
console.log(`⚠️ ${url} returned status ${response.status}`); console.log(`⚠️ ${url} returned status ${response.status}`);
}
} catch (error) { } catch (error) {
// Try GET request if HEAD fails // Try GET request if HEAD fails
try { try {
const response = await axios.get(url, { const response = await requestUrl('get', url);
maxRedirects: 0, const finalUrl = getFinalUrl(response, url);
timeout: 10000,
validateStatus: status => true
});
if (response.status === 200) { if (response.status === 200) {
const originalDomain = getDomain(url);
const finalDomain = getDomain(finalUrl);
if (finalDomain !== originalDomain) {
console.log(`🔄 ${url} resolved to ${finalUrl}`);
const needsTrailingSlash = hasTrailingSlash(url);
let updatedUrl = finalDomain;
if (needsTrailingSlash) {
updatedUrl += '/';
}
console.log(
`Will update to: ${updatedUrl} (preserved trailing slash: ${needsTrailingSlash})`
);
return updatedUrl;
}
console.log(`${url} is valid (200 OK)`); console.log(`${url} is valid (200 OK)`);
return null; return null;
} else if (response.status >= 300 && response.status < 400) { }
// Handle redirects
if (response.status >= 300 && response.status < 400) {
const newLocation = response.headers.location; const newLocation = response.headers.location;
if (newLocation) { if (newLocation) {
console.log(`🔄 ${url} redirects to ${newLocation}`); console.log(`🔄 ${url} redirects to ${newLocation}`);
@@ -100,24 +151,21 @@ async function checkUrl(url) {
fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString(); fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString();
} }
// Get the new domain
const newDomain = getDomain(fullRedirectUrl); const newDomain = getDomain(fullRedirectUrl);
// Check if original URL had a trailing slash
const needsTrailingSlash = hasTrailingSlash(url); const needsTrailingSlash = hasTrailingSlash(url);
let finalUrlForUpdate = newDomain;
// Create new URL: new domain + trailing slash if the original had one
let finalUrl = newDomain;
if (needsTrailingSlash) { if (needsTrailingSlash) {
finalUrl += '/'; finalUrlForUpdate += '/';
} }
console.log(`Will update to: ${finalUrl} (preserved trailing slash: ${needsTrailingSlash})`); console.log(
return finalUrl; `Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})`
);
return finalUrlForUpdate;
} }
} else { }
console.log(`⚠️ ${url} returned status ${response.status}`); console.log(`⚠️ ${url} returned status ${response.status}`);
}
} catch (getError) { } catch (getError) {
if (getError.response) { if (getError.response) {
console.log(`⚠️ ${url} returned status ${getError.response.status}`); console.log(`⚠️ ${url} returned status ${getError.response.status}`);