Handle blocked responses when final URL resolves elsewhere

This commit is contained in:
8man
2026-05-11 17:51:39 +05:30
parent bfe80ce95a
commit 38fd128eaa
+44 -28
View File
@@ -51,6 +51,14 @@ function getFinalUrl(response, originalUrl) {
); );
} }
function normalizeOrigin(url) {
try {
return new URL(url).origin;
} catch {
return url;
}
}
async function requestUrl(method, url) { async function requestUrl(method, url) {
return axios({ return axios({
method, method,
@@ -71,30 +79,27 @@ function logVerboseResult(url, response, finalUrl) {
); );
} }
// Check URL and return new URL if domain redirected function shouldUpdateFromFinalUrl(originalUrl, finalUrl) {
const originalDomain = getDomain(originalUrl);
const finalDomain = getDomain(finalUrl);
return finalDomain && finalDomain !== originalDomain;
}
// Check URL and return new URL if domain redirected or resolved elsewhere
async function checkUrl(url) { async function checkUrl(url) {
try { try {
const response = await requestUrl('get', url); const response = await requestUrl('get', url);
const finalUrl = getFinalUrl(response, url); const finalUrl = getFinalUrl(response, url);
logVerboseResult(url, response, finalUrl); logVerboseResult(url, response, finalUrl);
if (shouldUpdateFromFinalUrl(url, finalUrl)) {
const updatedUrl = normalizeOrigin(finalUrl) + (hasTrailingSlash(url) ? '/' : '');
console.log(`🔄 ${url} resolved to ${finalUrl}`);
console.log(`Will update to: ${updatedUrl} (preserved trailing slash: ${hasTrailingSlash(url)})`);
return updatedUrl;
}
if (response.status === 200) { if (response.status === 200) {
const originalDomain = getDomain(url);
const finalDomain = getDomain(finalUrl);
if (finalDomain !== originalDomain) {
console.log(`🔄 ${url} resolved to ${finalUrl}`);
const needsTrailingSlash = hasTrailingSlash(url);
let updatedUrl = finalDomain;
if (needsTrailingSlash) {
updatedUrl += '/';
}
console.log(
`Will update to: ${updatedUrl} (preserved trailing slash: ${needsTrailingSlash})`
);
return updatedUrl;
}
console.log(`${url} is valid (200 OK)`); console.log(`${url} is valid (200 OK)`);
return null; return null;
} }
@@ -108,18 +113,16 @@ async function checkUrl(url) {
fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString(); fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString();
} }
console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`); if (shouldUpdateFromFinalUrl(url, fullRedirectUrl)) {
const newDomain = getDomain(fullRedirectUrl); const newDomain = normalizeOrigin(fullRedirectUrl);
const needsTrailingSlash = hasTrailingSlash(url); const needsTrailingSlash = hasTrailingSlash(url);
let finalUrlForUpdate = newDomain; const finalUrlForUpdate = newDomain + (needsTrailingSlash ? '/' : '');
if (needsTrailingSlash) { console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`);
finalUrlForUpdate += '/'; console.log(
`Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})`
);
return finalUrlForUpdate;
} }
console.log(
`Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})`
);
return finalUrlForUpdate;
} }
} }
@@ -128,6 +131,19 @@ async function checkUrl(url) {
if (error.response) { if (error.response) {
const finalUrl = getFinalUrl(error.response, url); const finalUrl = getFinalUrl(error.response, url);
logVerboseResult(url, error.response, finalUrl); logVerboseResult(url, error.response, finalUrl);
// If the request resolves to a different origin even with a non-2xx status,
// use that as an update signal. This keeps existing behavior intact while
// allowing sites that block HEAD/GET with 403 but still resolve elsewhere.
if (shouldUpdateFromFinalUrl(url, finalUrl)) {
const updatedUrl = normalizeOrigin(finalUrl) + (hasTrailingSlash(url) ? '/' : '');
console.log(`🔄 ${url} resolved to ${finalUrl}`);
console.log(
`Will update to: ${updatedUrl} (preserved trailing slash: ${hasTrailingSlash(url)})`
);
return updatedUrl;
}
console.log(`⚠️ ${url} returned status ${error.response.status}`); console.log(`⚠️ ${url} returned status ${error.response.status}`);
} else if (error.code === 'ECONNABORTED') { } else if (error.code === 'ECONNABORTED') {
console.log(`${url} request timed out`); console.log(`${url} request timed out`);