mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-24 21:44:59 +00:00
make url/src columns unique keys, add insert/ignore construction
This commit is contained in:
parent
aadfe7f551
commit
b218b8bbc3
@ -92,9 +92,9 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
||||
if (!parse_url($src, PHP_URL_HOST)) {
|
||||
|
||||
$src = parse_url($queue->url, PHP_URL_SCHEME) . '://' .
|
||||
parse_url($queue->url, PHP_URL_HOST) .
|
||||
parse_url($queue->url, PHP_URL_PORT) .
|
||||
$src; // @TODO sometimes wrong URL prefix available
|
||||
parse_url($queue->url, PHP_URL_HOST) .
|
||||
parse_url($queue->url, PHP_URL_PORT) .
|
||||
$src; // @TODO sometimes wrong URL prefix available
|
||||
}
|
||||
|
||||
// Add page images
|
||||
@ -134,10 +134,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
||||
// Save valid internal links to the index queue
|
||||
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
|
||||
|
||||
if (!$db->getPage($href)) {
|
||||
|
||||
$db->initPage($href, time());
|
||||
}
|
||||
$db->initPage($href, time());
|
||||
}
|
||||
}
|
||||
}
|
@ -21,7 +21,7 @@ class SQLite {
|
||||
"data" TEXT,
|
||||
"description" TEXT,
|
||||
"keywords" TEXT,
|
||||
"url" TEXT NOT NULL
|
||||
"url" TEXT NOT NULL UNIQUE
|
||||
)
|
||||
');
|
||||
|
||||
@ -30,7 +30,7 @@ class SQLite {
|
||||
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"pageId" INTEGER NOT NULL,
|
||||
"alt" TEXT NOT NULL,
|
||||
"src" TEXT NOT NULL
|
||||
"src" TEXT NOT NULL UNIQUE
|
||||
)
|
||||
');
|
||||
|
||||
@ -85,15 +85,6 @@ class SQLite {
|
||||
return $query->fetch()->total;
|
||||
}
|
||||
|
||||
public function getPage(string $url) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `page` WHERE `url` = ?');
|
||||
|
||||
$query->execute([$url]);
|
||||
|
||||
return $query->fetch();
|
||||
}
|
||||
|
||||
public function updatePage(int $pageId, string $title, string $description, string $keywords, string $data, int $timeUpdated) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `page` SET `title` = ?, `description` = ?, `data` = ?, `timeUpdated` = ? WHERE `pageId` = ?');
|
||||
@ -114,7 +105,7 @@ class SQLite {
|
||||
|
||||
public function initPage(string $url, int $timeAdded) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
|
||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
|
||||
|
||||
$query->execute([$url, $timeAdded]);
|
||||
|
||||
@ -123,7 +114,7 @@ class SQLite {
|
||||
|
||||
public function addImage(int $pageId, string $src, string $alt) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
|
||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
|
||||
|
||||
$query->execute([$pageId, $src, $alt]);
|
||||
|
||||
|
@ -22,9 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
|
||||
// Crawl request
|
||||
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
|
||||
if (!$db->getPage($q)) {
|
||||
$db->initPage($q, time());
|
||||
}
|
||||
$db->initPage($q, time());
|
||||
}
|
||||
|
||||
// Search request
|
||||
|
Loading…
x
Reference in New Issue
Block a user