From 512bd5605654636194d63d4cbf86a075ee54f2de Mon Sep 17 00:00:00 2001 From: ghost Date: Sun, 4 Jun 2023 12:04:41 +0300 Subject: [PATCH] ban page that throws the error and stuck the crawl queue --- crontab/crawler.php | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/crontab/crawler.php b/crontab/crawler.php index 6d08673..692ec04 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -796,13 +796,32 @@ try { } } + // Apply changes $db->commit(); +// Process update errors } catch(Exception $e) { + // Decline DB changes + $db->rollBack(); + + // Debug std var_dump($e); - $db->rollBack(); + // Ban page that throws the error and stuck the crawl queue + if (!empty($queueHostPage->hostPageId)) { + + $hostPagesBanned = $db->updateHostPageTimeBanned($queueHostPage->hostPageId, time()); + + // Reset counters + $hostPagesProcessed = $hostPagesBanned; + $manifestsProcessed = 0; + $hostPagesIndexed = 0; + $manifestsAdded = 0; + $hostPagesAdded = 0; + $hostsAdded = 0; + $hostPagesSnapAdded = 0; + } } // Debug