From 93baed4b9048cb4f686f356381725382db56003b Mon Sep 17 00:00:00 2001 From: ghost Date: Wed, 20 Dec 2023 08:44:35 +0200 Subject: [PATCH] delete deprecated documents with HTTP code not 200 on second scan --- src/cli/document/crawl.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cli/document/crawl.php b/src/cli/document/crawl.php index 7f3cd6d..7a49873 100644 --- a/src/cli/document/crawl.php +++ b/src/cli/document/crawl.php @@ -178,6 +178,16 @@ foreach($index->search('') // Update HTTP code or skip on empty if ($code = curl_getinfo($request, CURLINFO_HTTP_CODE)) { + // Delete deprecated document from index as HTTP code still not 200 + if ($code != 200 && !empty($data['code']) && $data['code'] != 200) + { + $index->deleteDocument( + $document->getId() + ); + + continue; + } + $data['code'] = $code; } else continue;