mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 10:25:52 +00:00
make url/src column ukeys digital by using crc32
This commit is contained in:
parent
c770a912f0
commit
04dbbc3adf
@ -100,6 +100,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
||||
// Add page images
|
||||
$db->addImage($queue->pageId,
|
||||
Filter::url($src),
|
||||
crc32($src),
|
||||
Filter::imageAlt($alt));
|
||||
}
|
||||
}
|
||||
@ -134,7 +135,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
||||
// Save valid internal links to the index queue
|
||||
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
|
||||
|
||||
$db->initPage($href, time());
|
||||
$db->initPage($href, crc32($href), time());
|
||||
}
|
||||
}
|
||||
}
|
@ -14,6 +14,7 @@ class SQLite {
|
||||
$this->_db->query('
|
||||
CREATE TABLE IF NOT EXISTS "page" (
|
||||
"pageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"crc32url" INTEGER NOT NULL UNIQUE,
|
||||
"httpCode" INTEGER,
|
||||
"timeAdded" INTEGER NOT NULL,
|
||||
"timeUpdated" INTEGER,
|
||||
@ -21,16 +22,17 @@ class SQLite {
|
||||
"data" TEXT,
|
||||
"description" TEXT,
|
||||
"keywords" TEXT,
|
||||
"url" TEXT NOT NULL UNIQUE
|
||||
"url" TEXT NOT NULL
|
||||
)
|
||||
');
|
||||
|
||||
$this->_db->query('
|
||||
CREATE TABLE IF NOT EXISTS "image" (
|
||||
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"crc32src" INTEGER NOT NULL UNIQUE,
|
||||
"pageId" INTEGER NOT NULL,
|
||||
"alt" TEXT NOT NULL,
|
||||
"src" TEXT NOT NULL UNIQUE
|
||||
"src" TEXT NOT NULL
|
||||
)
|
||||
');
|
||||
|
||||
@ -103,20 +105,20 @@ class SQLite {
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
public function initPage(string $url, int $timeAdded) {
|
||||
public function initPage(string $url, int $crc32url, int $timeAdded) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
|
||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `crc32url`, `timeAdded`) VALUES (?, ?, ?)');
|
||||
|
||||
$query->execute([$url, $timeAdded]);
|
||||
$query->execute([$url, $crc32url, $timeAdded]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
public function addImage(int $pageId, string $src, string $alt) {
|
||||
public function addImage(int $pageId, string $src, int $crc32src, string $alt) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
|
||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `crc32src`, `alt`) VALUES (?, ?, ?, ?)');
|
||||
|
||||
$query->execute([$pageId, $src, $alt]);
|
||||
$query->execute([$pageId, $src, $crc32src, $alt]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
|
||||
// Crawl request
|
||||
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
|
||||
$db->initPage($q, time());
|
||||
$db->initPage($q, crc32($q), time());
|
||||
}
|
||||
|
||||
// Search request
|
||||
|
Loading…
x
Reference in New Issue
Block a user