From 38fd128eaa46450bd1dc11783f4f827e3afd5729 Mon Sep 17 00:00:00 2001 From: 8man Date: Mon, 11 May 2026 17:51:39 +0530 Subject: [PATCH] Handle blocked responses when final URL resolves elsewhere --- .github/scripts/url-checker.js | 72 +++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/.github/scripts/url-checker.js b/.github/scripts/url-checker.js index 209a2ec..1a65be2 100644 --- a/.github/scripts/url-checker.js +++ b/.github/scripts/url-checker.js @@ -51,6 +51,14 @@ function getFinalUrl(response, originalUrl) { ); } +function normalizeOrigin(url) { + try { + return new URL(url).origin; + } catch { + return url; + } +} + async function requestUrl(method, url) { return axios({ method, @@ -71,30 +79,27 @@ function logVerboseResult(url, response, finalUrl) { ); } -// Check URL and return new URL if domain redirected +function shouldUpdateFromFinalUrl(originalUrl, finalUrl) { + const originalDomain = getDomain(originalUrl); + const finalDomain = getDomain(finalUrl); + return finalDomain && finalDomain !== originalDomain; +} + +// Check URL and return new URL if domain redirected or resolved elsewhere async function checkUrl(url) { try { const response = await requestUrl('get', url); const finalUrl = getFinalUrl(response, url); logVerboseResult(url, response, finalUrl); + if (shouldUpdateFromFinalUrl(url, finalUrl)) { + const updatedUrl = normalizeOrigin(finalUrl) + (hasTrailingSlash(url) ? '/' : ''); + console.log(`🔄 ${url} resolved to ${finalUrl}`); + console.log(`Will update to: ${updatedUrl} (preserved trailing slash: ${hasTrailingSlash(url)})`); + return updatedUrl; + } + if (response.status === 200) { - const originalDomain = getDomain(url); - const finalDomain = getDomain(finalUrl); - - if (finalDomain !== originalDomain) { - console.log(`🔄 ${url} resolved to ${finalUrl}`); - const needsTrailingSlash = hasTrailingSlash(url); - let updatedUrl = finalDomain; - if (needsTrailingSlash) { - updatedUrl += '/'; - } - console.log( - `Will update to: ${updatedUrl} (preserved trailing slash: ${needsTrailingSlash})` - ); - return updatedUrl; - } - console.log(`✅ ${url} is valid (200 OK)`); return null; } @@ -108,18 +113,16 @@ async function checkUrl(url) { fullRedirectUrl = new URL(newLocation, baseUrl.origin).toString(); } - console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`); - const newDomain = getDomain(fullRedirectUrl); - const needsTrailingSlash = hasTrailingSlash(url); - let finalUrlForUpdate = newDomain; - if (needsTrailingSlash) { - finalUrlForUpdate += '/'; + if (shouldUpdateFromFinalUrl(url, fullRedirectUrl)) { + const newDomain = normalizeOrigin(fullRedirectUrl); + const needsTrailingSlash = hasTrailingSlash(url); + const finalUrlForUpdate = newDomain + (needsTrailingSlash ? '/' : ''); + console.log(`🔄 ${url} redirects to ${fullRedirectUrl}`); + console.log( + `Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})` + ); + return finalUrlForUpdate; } - - console.log( - `Will update to: ${finalUrlForUpdate} (preserved trailing slash: ${needsTrailingSlash})` - ); - return finalUrlForUpdate; } } @@ -128,6 +131,19 @@ async function checkUrl(url) { if (error.response) { const finalUrl = getFinalUrl(error.response, url); logVerboseResult(url, error.response, finalUrl); + + // If the request resolves to a different origin even with a non-2xx status, + // use that as an update signal. This keeps existing behavior intact while + // allowing sites that block HEAD/GET with 403 but still resolve elsewhere. + if (shouldUpdateFromFinalUrl(url, finalUrl)) { + const updatedUrl = normalizeOrigin(finalUrl) + (hasTrailingSlash(url) ? '/' : ''); + console.log(`🔄 ${url} resolved to ${finalUrl}`); + console.log( + `Will update to: ${updatedUrl} (preserved trailing slash: ${hasTrailingSlash(url)})` + ); + return updatedUrl; + } + console.log(`⚠️ ${url} returned status ${error.response.status}`); } else if (error.code === 'ECONNABORTED') { console.log(`⌛ ${url} request timed out`);