Browse Source

make url/src columns unique keys, add insert/ignore construction

sqliteway
ghost 2 years ago
parent
commit
b218b8bbc3
  1. 11
      crontab/crawler.php
  2. 17
      library/sqlite.php
  3. 4
      public/search.php

11
crontab/crawler.php

@ -92,9 +92,9 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET) @@ -92,9 +92,9 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
if (!parse_url($src, PHP_URL_HOST)) {
$src = parse_url($queue->url, PHP_URL_SCHEME) . '://' .
parse_url($queue->url, PHP_URL_HOST) .
parse_url($queue->url, PHP_URL_PORT) .
$src; // @TODO sometimes wrong URL prefix available
parse_url($queue->url, PHP_URL_HOST) .
parse_url($queue->url, PHP_URL_PORT) .
$src; // @TODO sometimes wrong URL prefix available
}
// Add page images
@ -134,10 +134,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET) @@ -134,10 +134,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
// Save valid internal links to the index queue
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
if (!$db->getPage($href)) {
$db->initPage($href, time());
}
$db->initPage($href, time());
}
}
}

17
library/sqlite.php

@ -21,7 +21,7 @@ class SQLite { @@ -21,7 +21,7 @@ class SQLite {
"data" TEXT,
"description" TEXT,
"keywords" TEXT,
"url" TEXT NOT NULL
"url" TEXT NOT NULL UNIQUE
)
');
@ -30,7 +30,7 @@ class SQLite { @@ -30,7 +30,7 @@ class SQLite {
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
"pageId" INTEGER NOT NULL,
"alt" TEXT NOT NULL,
"src" TEXT NOT NULL
"src" TEXT NOT NULL UNIQUE
)
');
@ -85,15 +85,6 @@ class SQLite { @@ -85,15 +85,6 @@ class SQLite {
return $query->fetch()->total;
}
public function getPage(string $url) {
$query = $this->_db->prepare('SELECT * FROM `page` WHERE `url` = ?');
$query->execute([$url]);
return $query->fetch();
}
public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) {
$query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?');
@ -114,7 +105,7 @@ class SQLite { @@ -114,7 +105,7 @@ class SQLite {
public function initPage(string $url, int $timeAdded) {
$query = $this->_db->prepare('INSERT INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
$query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
$query->execute([$url, $timeAdded]);
@ -123,7 +114,7 @@ class SQLite { @@ -123,7 +114,7 @@ class SQLite {
public function addImage(int $pageId, string $src, string $alt) {
$query = $this->_db->prepare('INSERT INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
$query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
$query->execute([$pageId, $src, $alt]);

4
public/search.php

@ -22,9 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : ''; @@ -22,9 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
// Crawl request
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
if (!$db->getPage($q)) {
$db->initPage($q, time());
}
$db->initPage($q, time());
}
// Search request

Loading…
Cancel
Save