make url/src columns unique keys, add insert/ignore construction

This commit is contained in:
ghost 2023-04-02 18:09:44 +03:00
parent aadfe7f551
commit b218b8bbc3
3 changed files with 9 additions and 23 deletions

View File

@ -92,9 +92,9 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
if (!parse_url($src, PHP_URL_HOST)) { if (!parse_url($src, PHP_URL_HOST)) {
$src = parse_url($queue->url, PHP_URL_SCHEME) . '://' . $src = parse_url($queue->url, PHP_URL_SCHEME) . '://' .
parse_url($queue->url, PHP_URL_HOST) . parse_url($queue->url, PHP_URL_HOST) .
parse_url($queue->url, PHP_URL_PORT) . parse_url($queue->url, PHP_URL_PORT) .
$src; // @TODO sometimes wrong URL prefix available $src; // @TODO sometimes wrong URL prefix available
} }
// Add page images // Add page images
@ -134,10 +134,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
// Save valid internal links to the index queue // Save valid internal links to the index queue
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) { if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
if (!$db->getPage($href)) { $db->initPage($href, time());
$db->initPage($href, time());
}
} }
} }
} }

View File

@ -21,7 +21,7 @@ class SQLite {
"data" TEXT, "data" TEXT,
"description" TEXT, "description" TEXT,
"keywords" TEXT, "keywords" TEXT,
"url" TEXT NOT NULL "url" TEXT NOT NULL UNIQUE
) )
'); ');
@ -30,7 +30,7 @@ class SQLite {
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"pageId" INTEGER NOT NULL, "pageId" INTEGER NOT NULL,
"alt" TEXT NOT NULL, "alt" TEXT NOT NULL,
"src" TEXT NOT NULL "src" TEXT NOT NULL UNIQUE
) )
'); ');
@ -85,15 +85,6 @@ class SQLite {
return $query->fetch()->total; return $query->fetch()->total;
} }
public function getPage(string $url) {
$query = $this->_db->prepare('SELECT * FROM `page` WHERE `url` = ?');
$query->execute([$url]);
return $query->fetch();
}
public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) { public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) {
$query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?'); $query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?');
@ -114,7 +105,7 @@ class SQLite {
public function initPage(string $url, int $timeAdded) { public function initPage(string $url, int $timeAdded) {
$query = $this->_db->prepare('INSERT INTO `page` (`url`, `timeAdded`) VALUES (?, ?)'); $query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
$query->execute([$url, $timeAdded]); $query->execute([$url, $timeAdded]);
@ -123,7 +114,7 @@ class SQLite {
public function addImage(int $pageId, string $src, string $alt) { public function addImage(int $pageId, string $src, string $alt) {
$query = $this->_db->prepare('INSERT INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)'); $query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
$query->execute([$pageId, $src, $alt]); $query->execute([$pageId, $src, $alt]);

View File

@ -22,9 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
// Crawl request // Crawl request
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
if (!$db->getPage($q)) { $db->initPage($q, time());
$db->initPage($q, time());
}
} }
// Search request // Search request