diff --git a/backend/src/fetch/generic.rs b/backend/src/fetch/generic.rs index ae7f64f..a930345 100644 --- a/backend/src/fetch/generic.rs +++ b/backend/src/fetch/generic.rs @@ -52,7 +52,26 @@ pub async fn fetch( ) -> anyhow::Result> { let resp = client.get(raw_url).send().await?; if !resp.status().is_success() { - anyhow::bail!("product page returned HTTP {}", resp.status()); + let status = resp.status(); + // Cloudflare/Akamai bot managers answer 403/503 with an active + // challenge a plain HTTP client can't solve (needs a real browser). + // Name it so the failure is actionable, not a mystery "403". + let challenged = resp + .headers() + .get("cf-mitigated") + .is_some_and(|v| v.to_str().map(|s| s.contains("challenge")).unwrap_or(false)) + || matches!(status.as_u16(), 403 | 503) + && resp + .headers() + .get("server") + .and_then(|v| v.to_str().ok()) + .is_some_and(|s| s.eq_ignore_ascii_case("cloudflare")); + if challenged { + anyhow::bail!( + "this shop is behind anti-bot protection (Cloudflare) and can't be price-checked automatically" + ); + } + anyhow::bail!("product page returned HTTP {status}"); } let body = resp.text().await?;