Browse Source

make url/src columns unique keys, add insert/ignore construction

sqliteway
ghost 2 years ago
parent
commit
b218b8bbc3
  1. 3
      crontab/crawler.php
  2. 17
      library/sqlite.php
  3. 2
      public/search.php

3
crontab/crawler.php

@ -134,10 +134,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
// Save valid internal links to the index queue // Save valid internal links to the index queue
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) { if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
if (!$db->getPage($href)) {
$db->initPage($href, time()); $db->initPage($href, time());
} }
} }
} }
}

17
library/sqlite.php

@ -21,7 +21,7 @@ class SQLite {
"data" TEXT, "data" TEXT,
"description" TEXT, "description" TEXT,
"keywords" TEXT, "keywords" TEXT,
"url" TEXT NOT NULL "url" TEXT NOT NULL UNIQUE
) )
'); ');
@ -30,7 +30,7 @@ class SQLite {
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, "imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"pageId" INTEGER NOT NULL, "pageId" INTEGER NOT NULL,
"alt" TEXT NOT NULL, "alt" TEXT NOT NULL,
"src" TEXT NOT NULL "src" TEXT NOT NULL UNIQUE
) )
'); ');
@ -85,15 +85,6 @@ class SQLite {
return $query->fetch()->total; return $query->fetch()->total;
} }
public function getPage(string $url) {
$query = $this->_db->prepare('SELECT * FROM `page` WHERE `url` = ?');
$query->execute([$url]);
return $query->fetch();
}
public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) { public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) {
$query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?'); $query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?');
@ -114,7 +105,7 @@ class SQLite {
public function initPage(string $url, int $timeAdded) { public function initPage(string $url, int $timeAdded) {
$query = $this->_db->prepare('INSERT INTO `page` (`url`, `timeAdded`) VALUES (?, ?)'); $query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
$query->execute([$url, $timeAdded]); $query->execute([$url, $timeAdded]);
@ -123,7 +114,7 @@ class SQLite {
public function addImage(int $pageId, string $src, string $alt) { public function addImage(int $pageId, string $src, string $alt) {
$query = $this->_db->prepare('INSERT INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)'); $query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
$query->execute([$pageId, $src, $alt]); $query->execute([$pageId, $src, $alt]);

2
public/search.php

@ -22,10 +22,8 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
// Crawl request // Crawl request
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
if (!$db->getPage($q)) {
$db->initPage($q, time()); $db->initPage($q, time());
} }
}
// Search request // Search request
if (!empty($q)) { if (!empty($q)) {

Loading…
Cancel
Save