mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-09 13:24:37 +00:00
make url/src column ukeys digital by using crc32
This commit is contained in:
parent
c770a912f0
commit
04dbbc3adf
@ -100,6 +100,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
|||||||
// Add page images
|
// Add page images
|
||||||
$db->addImage($queue->pageId,
|
$db->addImage($queue->pageId,
|
||||||
Filter::url($src),
|
Filter::url($src),
|
||||||
|
crc32($src),
|
||||||
Filter::imageAlt($alt));
|
Filter::imageAlt($alt));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -134,7 +135,7 @@ foreach ($db->getPageQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECONDS_OFFSET)
|
|||||||
// Save valid internal links to the index queue
|
// Save valid internal links to the index queue
|
||||||
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
|
if (filter_var($href, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $href)) {
|
||||||
|
|
||||||
$db->initPage($href, time());
|
$db->initPage($href, crc32($href), time());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -14,6 +14,7 @@ class SQLite {
|
|||||||
$this->_db->query('
|
$this->_db->query('
|
||||||
CREATE TABLE IF NOT EXISTS "page" (
|
CREATE TABLE IF NOT EXISTS "page" (
|
||||||
"pageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
"pageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||||
|
"crc32url" INTEGER NOT NULL UNIQUE,
|
||||||
"httpCode" INTEGER,
|
"httpCode" INTEGER,
|
||||||
"timeAdded" INTEGER NOT NULL,
|
"timeAdded" INTEGER NOT NULL,
|
||||||
"timeUpdated" INTEGER,
|
"timeUpdated" INTEGER,
|
||||||
@ -21,16 +22,17 @@ class SQLite {
|
|||||||
"data" TEXT,
|
"data" TEXT,
|
||||||
"description" TEXT,
|
"description" TEXT,
|
||||||
"keywords" TEXT,
|
"keywords" TEXT,
|
||||||
"url" TEXT NOT NULL UNIQUE
|
"url" TEXT NOT NULL
|
||||||
)
|
)
|
||||||
');
|
');
|
||||||
|
|
||||||
$this->_db->query('
|
$this->_db->query('
|
||||||
CREATE TABLE IF NOT EXISTS "image" (
|
CREATE TABLE IF NOT EXISTS "image" (
|
||||||
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
"imageId" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||||
|
"crc32src" INTEGER NOT NULL UNIQUE,
|
||||||
"pageId" INTEGER NOT NULL,
|
"pageId" INTEGER NOT NULL,
|
||||||
"alt" TEXT NOT NULL,
|
"alt" TEXT NOT NULL,
|
||||||
"src" TEXT NOT NULL UNIQUE
|
"src" TEXT NOT NULL
|
||||||
)
|
)
|
||||||
');
|
');
|
||||||
|
|
||||||
@ -103,20 +105,20 @@ class SQLite {
|
|||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function initPage(string $url, int $timeAdded) {
|
public function initPage(string $url, int $crc32url, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `timeAdded`) VALUES (?, ?)');
|
$query = $this->_db->prepare('INSERT OR IGNORE INTO `page` (`url`, `crc32url`, `timeAdded`) VALUES (?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([$url, $timeAdded]);
|
$query->execute([$url, $crc32url, $timeAdded]);
|
||||||
|
|
||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addImage(int $pageId, string $src, string $alt) {
|
public function addImage(int $pageId, string $src, int $crc32src, string $alt) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `alt`) VALUES (?, ?, ?)');
|
$query = $this->_db->prepare('INSERT OR IGNORE INTO `image` (`pageId`, `src`, `crc32src`, `alt`) VALUES (?, ?, ?, ?)');
|
||||||
|
|
||||||
$query->execute([$pageId, $src, $alt]);
|
$query->execute([$pageId, $src, $crc32src, $alt]);
|
||||||
|
|
||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,7 @@ $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
|
|||||||
// Crawl request
|
// Crawl request
|
||||||
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||||
|
|
||||||
$db->initPage($q, time());
|
$db->initPage($q, crc32($q), time());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search request
|
// Search request
|
||||||
|
Loading…
x
Reference in New Issue
Block a user