mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 18:35:04 +00:00
optimize snaps, delete unused constructions
This commit is contained in:
parent
36becf6fe1
commit
3e3b7ee2ef
@ -97,10 +97,10 @@ define('SNAP_STORAGE', json_encode((object)
|
|||||||
'directory' => __DIR__ . '/../storage/snap/hp/',
|
'directory' => __DIR__ . '/../storage/snap/hp/',
|
||||||
'quota' => [
|
'quota' => [
|
||||||
'mime' => false,
|
'mime' => false,
|
||||||
'size' => 10000000024,
|
'size' => 10000000024, // @TODO
|
||||||
'request' => [
|
'request' => [
|
||||||
'download' => [
|
'download' => [
|
||||||
'size' => 10000024,
|
'size' => 10000024, // @TODO
|
||||||
'seconds' => 60*60
|
'seconds' => 60*60
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
@ -119,10 +119,10 @@ define('SNAP_STORAGE', json_encode((object)
|
|||||||
'passive' => true,
|
'passive' => true,
|
||||||
'quota' => [
|
'quota' => [
|
||||||
'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico',
|
'mime' => 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico',
|
||||||
'size' => 10000000024,
|
'size' => 10000000024, // @TODO
|
||||||
'request' => [
|
'request' => [
|
||||||
'download' => [
|
'download' => [
|
||||||
'size' => 10000024,
|
'size' => 10000024, // @TODO
|
||||||
'seconds' => 60*60
|
'seconds' => 60*60
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
@ -656,133 +656,131 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
// Begin snaps
|
// Begin snaps
|
||||||
if (SNAP_STORAGE) {
|
if (SNAP_STORAGE) {
|
||||||
|
|
||||||
$crc32data = crc32($content);
|
$hostPageSnapTimeAdded = time();
|
||||||
|
$hostPageSnapPath = chunk_split($queueHostPage->hostPageId, 1, '/');
|
||||||
|
|
||||||
$snapTime = time();
|
$hostPageSnapFilenameTmp = __DIR__ . '/../storage/tmp/snap/hp/' . $hostPageSnapPath . $hostPageSnapTimeAdded . '.zip';
|
||||||
$snapPath = chunk_split($queueHostPage->hostPageId, 1, '/');
|
@mkdir(__DIR__ . '/../storage/tmp/snap/hp/' . $hostPageSnapPath, 0755, true);
|
||||||
|
|
||||||
$snapTmp = __DIR__ . '/../storage/tmp/snap/hp/' . $snapPath . $snapTime . '.zip';
|
// Create new ZIP container
|
||||||
@mkdir(__DIR__ . '/../storage/tmp/snap/hp/' . $snapPath, 0755, true);
|
$zip = new ZipArchive();
|
||||||
|
|
||||||
// Create not duplicated data snaps only, even newer by time added
|
if (true === $zip->open($hostPageSnapFilenameTmp, ZipArchive::CREATE)) {
|
||||||
if ($hostPageSnap = $db->findHostPageSnap($queueHostPage->hostPageId, $crc32data)) {
|
|
||||||
|
|
||||||
$hostPageSnapId = $hostPageSnap->hostPageSnapId;
|
// Insert compressed snap data into the tmp storage
|
||||||
|
if (true === $zip->addFromString('DATA', $content) &&
|
||||||
|
true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $hostPageSnapTimeAdded) . PHP_EOL .
|
||||||
|
sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
||||||
|
sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL .
|
||||||
|
sprintf('TARGET: %s', Filter::url($queueHostPage->hostPageURL)))) {
|
||||||
|
|
||||||
} else {
|
// Done
|
||||||
|
$zip->close();
|
||||||
|
|
||||||
// Create new ZIP container
|
// Temporarily snap file exists
|
||||||
$zip = new ZipArchive();
|
if (file_exists($hostPageSnapFilenameTmp)) {
|
||||||
|
|
||||||
if (true === $zip->open($snapTmp, ZipArchive::CREATE)) {
|
// Register snap in DB
|
||||||
|
if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $hostPageSnapTimeAdded)) {
|
||||||
|
|
||||||
// Insert compressed snap data into the tmp storage
|
// Default storage success
|
||||||
if (true === $zip->addFromString('DATA', $content) &&
|
$snapFilesExists = false;
|
||||||
true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $snapTime) . PHP_EOL .
|
|
||||||
sprintf('CRC32: %s', $crc32data . PHP_EOL .
|
|
||||||
sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
|
||||||
sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL .
|
|
||||||
sprintf('TARGET: %s', Filter::url($queueHostPage->hostPageURL))))) {
|
|
||||||
|
|
||||||
// Done
|
// Copy files to each storage
|
||||||
$zip->close();
|
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
||||||
|
|
||||||
// Temporarily snap file exists
|
foreach ($storages as $i => $storage) {
|
||||||
if (file_exists($snapTmp)) {
|
|
||||||
|
|
||||||
// Register snap in DB
|
// Generate storage id
|
||||||
if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $crc32data, filesize($snapTmp), $snapTime)) {
|
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
||||||
|
|
||||||
|
switch ($name) {
|
||||||
|
|
||||||
|
case 'localhost':
|
||||||
|
|
||||||
|
// Validate mime
|
||||||
|
if (!$storage->quota->mime) continue 2;
|
||||||
|
|
||||||
|
$snapMimeValid = false;
|
||||||
|
foreach ((array) explode(',', $storage->quota->mime) as $mime) {
|
||||||
|
|
||||||
|
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||||
|
|
||||||
|
$snapMimeValid = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$snapMimeValid) continue 2;
|
||||||
|
|
||||||
|
// Copy tmp snap file to the permanent storage
|
||||||
|
@mkdir($storage->directory . $hostPageSnapPath, 0755, true);
|
||||||
|
|
||||||
|
if (copy($hostPageSnapFilenameTmp, $storage->directory . $hostPageSnapPath . $hostPageSnapTimeAdded . '.zip')) {
|
||||||
|
|
||||||
|
// Register storage name
|
||||||
|
$db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time());
|
||||||
|
|
||||||
|
$snapFilesExists = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
case 'ftp':
|
||||||
|
|
||||||
|
// Validate mime
|
||||||
|
if (!$storage->quota->mime) continue 2;
|
||||||
|
|
||||||
|
$snapMimeValid = false;
|
||||||
|
foreach ((array) explode(',', $storage->quota->mime) as $mime) {
|
||||||
|
|
||||||
|
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
||||||
|
|
||||||
|
$snapMimeValid = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!$snapMimeValid) continue 2;
|
||||||
|
|
||||||
|
// Copy tmp snap file to the permanent storage
|
||||||
|
$ftp = new Ftp();
|
||||||
|
|
||||||
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
|
||||||
|
$ftp->mkdir('hp/' . $hostPageSnapPath, true);
|
||||||
|
|
||||||
|
if ($ftp->copy($hostPageSnapFilenameTmp, 'hp/' . $hostPageSnapPath . $hostPageSnapTimeAdded . '.zip')) {
|
||||||
|
|
||||||
|
// Register storage name
|
||||||
|
$db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time());
|
||||||
|
|
||||||
|
$snapFilesExists = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
$ftp->close();
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// At least one file have been stored
|
||||||
|
if ($snapFilesExists) {
|
||||||
|
|
||||||
$hostPagesSnapAdded++;
|
$hostPagesSnapAdded++;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
$db->deleteHostPageSnap($hostPageSnapId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy files to each storage
|
|
||||||
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
|
||||||
|
|
||||||
// Generate storage id
|
|
||||||
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
|
||||||
|
|
||||||
switch ($name) {
|
|
||||||
|
|
||||||
case 'localhost':
|
|
||||||
|
|
||||||
// Validate size quota
|
|
||||||
if ($db->getTotalHostPageSnapSizeByStorage($hostPageSnapId, $crc32name) >= $storage->quota->size) continue 2;
|
|
||||||
|
|
||||||
// Validate mime
|
|
||||||
if (!$storage->quota->mime) continue 2;
|
|
||||||
|
|
||||||
$snapMimeValid = false;
|
|
||||||
foreach ((array) explode(',', $storage->quota->mime) as $mime) {
|
|
||||||
|
|
||||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
|
||||||
|
|
||||||
$snapMimeValid = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!$snapMimeValid) continue 2;
|
|
||||||
|
|
||||||
// Copy tmp snap file to the permanent storage
|
|
||||||
@mkdir($storage->directory . $snapPath, 0755, true);
|
|
||||||
|
|
||||||
if (copy($snapTmp, $storage->directory . $snapPath . $snapTime . '.zip')) {
|
|
||||||
|
|
||||||
// Register storage name
|
|
||||||
$db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time());
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
case 'ftp':
|
|
||||||
|
|
||||||
// Validate size quota
|
|
||||||
if ($db->getTotalHostPageSnapSizeByStorage($hostPageSnapId, $crc32name) >= $storage->quota->size) continue 2;
|
|
||||||
|
|
||||||
// Validate mime
|
|
||||||
if (!$storage->quota->mime) continue 2;
|
|
||||||
|
|
||||||
$snapMimeValid = false;
|
|
||||||
foreach ((array) explode(',', $storage->quota->mime) as $mime) {
|
|
||||||
|
|
||||||
if (false !== stripos(Filter::mime($contentType), Filter::mime($mime))) {
|
|
||||||
|
|
||||||
$snapMimeValid = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!$snapMimeValid) continue 2;
|
|
||||||
|
|
||||||
// Copy tmp snap file to the permanent storage
|
|
||||||
$ftp = new Ftp();
|
|
||||||
|
|
||||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
|
||||||
|
|
||||||
$ftp->mkdir('hp/' . $snapPath, true);
|
|
||||||
|
|
||||||
if ($ftp->copy($snapTmp, 'hp/' . $snapPath . $snapTime . '.zip')) {
|
|
||||||
|
|
||||||
// Register storage name
|
|
||||||
$db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time());
|
|
||||||
}
|
|
||||||
|
|
||||||
$ftp->close();
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete tmp snap
|
// Delete tmp snap
|
||||||
unlink($snapTmp);
|
unlink($hostPageSnapFilenameTmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip page links following with meta robots:nofollow attribute
|
// Skip page links following with meta robots:nofollow attribute
|
||||||
|
Binary file not shown.
@ -483,14 +483,11 @@ class MySQL {
|
|||||||
return $query->fetchAll();
|
return $query->fetchAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function addHostPageSnap(int $hostPageId, string $crc32data, int $size, int $timeAdded) {
|
public function addHostPageSnap(int $hostPageId, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostPageSnap` (`hostPageId`,
|
$query = $this->_db->prepare('INSERT INTO `hostPageSnap` (`hostPageId`, `timeAdded`) VALUES (?, ?)');
|
||||||
`crc32data`,
|
|
||||||
`size`,
|
|
||||||
`timeAdded`) VALUES (?, ?, ?, ?)');
|
|
||||||
|
|
||||||
$query->execute([$hostPageId, $crc32data, $size, $timeAdded]);
|
$query->execute([$hostPageId, $timeAdded]);
|
||||||
|
|
||||||
return $this->_db->lastInsertId();
|
return $this->_db->lastInsertId();
|
||||||
}
|
}
|
||||||
@ -522,19 +519,6 @@ class MySQL {
|
|||||||
return $query->fetchAll();
|
return $query->fetchAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getTotalHostPageSnapSizeByStorage(int $hostPageId, int $crc32name) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT SUM(`hostPageSnap`.`size`) AS `total` FROM `hostPageSnap`
|
|
||||||
JOIN `hostPageSnapStorage` ON (`hostPageSnapStorage`.`hostPageSnapId` = `hostPageSnap`.`hostPageSnapId`)
|
|
||||||
|
|
||||||
WHERE `hostPageSnap`.`hostPageSnapId` = ?
|
|
||||||
AND `hostPageSnapStorage`.`crc32name` = ?');
|
|
||||||
|
|
||||||
$query->execute([$hostPageId, $crc32name]);
|
|
||||||
|
|
||||||
return $query->fetch()->total;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function getHostPageSnap(int $hostPageSnapId) {
|
public function getHostPageSnap(int $hostPageSnapId) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1');
|
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageSnapId` = ? LIMIT 1');
|
||||||
@ -544,15 +528,6 @@ class MySQL {
|
|||||||
return $query->fetch();
|
return $query->fetch();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function findHostPageSnap(int $hostPageId, int $crc32data) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND `crc32data` = ? LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$hostPageId, $crc32data]);
|
|
||||||
|
|
||||||
return $query->fetch();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
|
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,
|
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user