diff --git a/crontab/crawler.php b/crontab/crawler.php index e4dd37b..9bab142 100644 --- a/crontab/crawler.php +++ b/crontab/crawler.php @@ -92,9 +92,9 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET) if (!parse_url($src, PHP_URL_HOST)) { $src = parse_url($queue->url, PHP_URL_SCHEME) . '://' . - parse_url($queue->url, PHP_URL_HOST) . - parse_url($queue->url, PHP_URL_PORT) . - $src; // @TODO sometimes wrong URL prefix available + parse_url($queue->url, PHP_URL_HOST) . + parse_url($queue->url, PHP_URL_PORT) . + $src; // @TODO sometimes wrong URL prefix available } // Add page images @@ -134,10 +134,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET) // Save valid internal links to the index queue if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) { - if (!$db->getPage($href)) { - - $db->initPage($href, time()); - } + $db->initPage($href, time()); } } } \ No newline at end of file diff --git a/library/sqlite.php b/library/sqlite.php index ff09e9e..84dd0bc 100644 --- a/library/sqlite.php +++ b/library/sqlite.php @@ -21,7 +21,7 @@ class SQLite { "data" TEXT, "description" TEXT, "keywords" TEXT, - "url" TEXT NOT NULL + "url" TEXT NOT NULL UNIQUE ) '); @@ -30,7 +30,7 @@ class SQLite { "imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "pageId" INTEGER NOT NULL, "alt" TEXT NOT NULL, - "src" TEXT NOT NULL + "src" TEXT NOT NULL UNIQUE ) '); @@ -85,15 +85,6 @@ class SQLite { return $query->fetch()->total; } - public function getPage(string $url) { - - $query = $this->_db->prepare('SELECT * FROM `page` WHERE `url` = ?'); - - $query->execute([$url]); - - return $query->fetch(); - } - public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) { $query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?'); @@ -114,7 +105,7 @@ class SQLite { public function initPage(string $url, int $timeAdded) { - $query = $this->_db->prepare('INSERT INTO `page` (`url`, `timeAdded`) VALUES (?, ?)'); + $query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)'); $query->execute([$url, $timeAdded]); @@ -123,7 +114,7 @@ class SQLite { public function addImage(int $pageId, string $src, string $alt) { - $query = $this->_db->prepare('INSERT INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)'); + $query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)'); $query->execute([$pageId, $src, $alt]); diff --git a/public/search.php b/public/search.php index 637a220..8e76f4a 100644 --- a/public/search.php +++ b/public/search.php @@ -22,9 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : ''; // Crawl request if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { - if (!$db->getPage($q)) { - $db->initPage($q, time()); - } + $db->initPage($q, time()); } // Search request