mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-08 22:07:56 +00:00
refactor snap storage
This commit is contained in:
parent
5d7bcba42b
commit
9c0f361601
@ -227,10 +227,10 @@ GET m=SphinxQL
|
|||||||
* [x] crawl
|
* [x] crawl
|
||||||
* [x] clean
|
* [x] clean
|
||||||
* [x] hostPageSnap
|
* [x] hostPageSnap
|
||||||
+ [x] repair
|
+ [x] repair (not tested)
|
||||||
+ [x] _sync DB-FS relations_
|
+ [x] _sync DB-FS relations_
|
||||||
+ [x] _FTP_
|
+ [x] _FTP_
|
||||||
+ [x] _localhost (not tested)_
|
+ [x] _localhost_
|
||||||
+ [x] _delete FS missed in the DB_
|
+ [x] _delete FS missed in the DB_
|
||||||
+ [x] _FTP_
|
+ [x] _FTP_
|
||||||
+ [ ] _localhost_
|
+ [ ] _localhost_
|
||||||
|
65
cli/yggo.php
65
cli/yggo.php
@ -97,6 +97,12 @@ switch ($argv[1]) {
|
|||||||
|
|
||||||
case 'repair':
|
case 'repair':
|
||||||
|
|
||||||
|
// @TODO
|
||||||
|
CLI::danger(_('this function upgraded but not tested after snaps refactor.'));
|
||||||
|
CLI::danger(_('make sure you have backups then remove this alert.'));
|
||||||
|
CLI::break();
|
||||||
|
exit;
|
||||||
|
|
||||||
// Normalize & cleanup DB
|
// Normalize & cleanup DB
|
||||||
CLI::notice(_('scan database registry for missed snap files...'));
|
CLI::notice(_('scan database registry for missed snap files...'));
|
||||||
|
|
||||||
@ -104,29 +110,31 @@ switch ($argv[1]) {
|
|||||||
|
|
||||||
foreach ($db->getHostPages($host->hostId) as $hostPage) {
|
foreach ($db->getHostPages($host->hostId) as $hostPage) {
|
||||||
|
|
||||||
$snapPath = chunk_split($hostPage->hostPageId, 1, '/');
|
|
||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||||
|
|
||||||
|
// Prepare filenames
|
||||||
|
$hostPageSnapPath = 'hps/' . substr(trim(chunk_split($hostPageSnap->hostPageSnapId, 1, '/'), '/'), 0, -1);
|
||||||
|
$hostPageSnapFile = $hostPageSnapPath . substr($hostPageSnap->hostPageSnapId, -1) . '.zip';
|
||||||
|
|
||||||
// Define variables
|
// Define variables
|
||||||
$hostPageSnapStorageFilesExists = false;
|
$hostPageSnapStorageFilesExists = false;
|
||||||
|
|
||||||
// Check file exists
|
// Check file exists
|
||||||
foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
|
foreach (json_decode(SNAP_STORAGE) as $node => $storages) {
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
foreach ($storages as $location => $storage) {
|
||||||
|
|
||||||
// Generate storage id
|
// Generate storage id
|
||||||
$crc32name = crc32(sprintf('%s.%s', $hostPageSnapStorageName, $i));
|
$crc32name = crc32(sprintf('%s.%s', $node, $location));
|
||||||
|
|
||||||
switch ($hostPageSnapStorageName) {
|
switch ($node) {
|
||||||
|
|
||||||
case 'localhost':
|
case 'localhost':
|
||||||
|
|
||||||
// @TODO implemented, not tested
|
// @TODO implemented, not tested
|
||||||
$hostPageSnapFilename = $storage->directory . $snapPath . $hostPageSnap->timeAdded . '.zip';
|
$hostPageSnapFile = $storage->directory . $hostPageSnapFile;
|
||||||
|
|
||||||
if (file_exists($hostPageSnapFilename)) {
|
if (file_exists($hostPageSnapFile)) {
|
||||||
|
|
||||||
$hostPageSnapStorageFilesExists = true;
|
$hostPageSnapStorageFilesExists = true;
|
||||||
|
|
||||||
@ -134,12 +142,12 @@ switch ($argv[1]) {
|
|||||||
|
|
||||||
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
|
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
|
||||||
|
|
||||||
CLI::warning(sprintf(_('add index hostPageSnapId #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
CLI::warning(sprintf(_('add index hostPageSnapId #%s file: %s node: %s location: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFile, $node, $location));
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
CLI::success(sprintf(_('skip related index hostPageSnapId #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
CLI::success(sprintf(_('skip related index hostPageSnapId #%s file: %s node: %s location: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFile, $node, $location));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,9 +159,7 @@ switch ($argv[1]) {
|
|||||||
|
|
||||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
|
||||||
$hostPageSnapFilename = 'hp/' . $snapPath . $hostPageSnap->timeAdded . '.zip';
|
if ($ftp->size($hostPageSnapFile)) {
|
||||||
|
|
||||||
if ($ftp->size($hostPageSnapFilename)) {
|
|
||||||
|
|
||||||
$hostPageSnapStorageFilesExists = true;
|
$hostPageSnapStorageFilesExists = true;
|
||||||
|
|
||||||
@ -161,18 +167,18 @@ switch ($argv[1]) {
|
|||||||
|
|
||||||
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
|
if ($db->addHostPageSnapStorage($hostPageSnap->hostPageSnapId, $crc32name, $hostPageSnap->timeAdded)) {
|
||||||
|
|
||||||
CLI::warning(sprintf(_('add index hostPageSnapId #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
CLI::warning(sprintf(_('add index hostPageSnapId #%s file: %s node: %s location: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFile, $node, $location));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
CLI::success(sprintf(_('skip related index hostPageSnapId #%s file: %s storage: %s index: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
CLI::success(sprintf(_('skip related index hostPageSnapId #%s file: %s node: %s location: %s;'), $hostPageSnap->hostPageSnapId, $hostPageSnapFile, $node, $location));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prevent snap deletion from registry on FTP connection lost
|
// Prevent snap deletion from registry on FTP connection lost
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
CLI::danger(sprintf(_('could not connect to storage %s index %s. operation stopped to prevent the data lose.'), $hostPageSnapStorageName, $i));
|
CLI::danger(sprintf(_('could not connect to storage %s location %s. operation stopped to prevent the data lose.'), $hostPageSnapStorageName, $location));
|
||||||
CLI::break();
|
CLI::break();
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
@ -218,11 +224,15 @@ switch ($argv[1]) {
|
|||||||
// Cleanup FS
|
// Cleanup FS
|
||||||
CLI::notice(_('scan storage for snap files missed in the DB...'));
|
CLI::notice(_('scan storage for snap files missed in the DB...'));
|
||||||
|
|
||||||
foreach (json_decode(SNAP_STORAGE) as $hostPageSnapStorageName => $storages) {
|
// Copy files to each storage
|
||||||
|
foreach (json_decode(SNAP_STORAGE) as $node => $storages) {
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
foreach ($storages as $location => $storage) {
|
||||||
|
|
||||||
switch ($hostPageSnapStorageName) {
|
// Generate storage id
|
||||||
|
$crc32name = crc32(sprintf('%s.%s', $node, $location));
|
||||||
|
|
||||||
|
switch ($node) {
|
||||||
|
|
||||||
case 'localhost':
|
case 'localhost':
|
||||||
|
|
||||||
@ -236,27 +246,26 @@ switch ($argv[1]) {
|
|||||||
|
|
||||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
|
||||||
foreach ($ftp->nlistr($storage->directory) as $hostPageSnapFilename) {
|
foreach ($ftp->nlistr($storage->directory) as $filename) {
|
||||||
|
|
||||||
if (false !== preg_match(sprintf('!/hp/([\d/]+)/([\d]+)\.zip$!ui', $storage->directory), $hostPageSnapFilename, $matches)) {
|
if (false !== preg_match(sprintf('!/hps/([\d]+)\.zip$!ui', $storage->directory), $filename, $matches)) {
|
||||||
|
|
||||||
if (!empty($matches[1]) && // hostPageId
|
if (!empty($matches[1])) { // hostPageSnapId
|
||||||
!empty($matches[2])) { // timeAdded
|
|
||||||
|
|
||||||
if (!$db->findHostPageSnapByTimeAdded($matches[1], $matches[2])) {
|
if (!$db->getHostPageSnap($matches[1])) {
|
||||||
|
|
||||||
if ($ftp->delete($hostPageSnapFilename)) {
|
if ($ftp->delete($filename)) {
|
||||||
|
|
||||||
CLI::warning(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
CLI::warning(sprintf(_('delete snap file: #%s from node %s location %s not found in registry;'), $filename, $node, $location));
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
CLI::danger(sprintf(_('delete snap file: #%s from storage %s index %s not found in registry;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
CLI::danger(sprintf(_('delete snap file: #%s from node %s location %s not found in registry;'), $filename, $node, $location));
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
CLI::success(sprintf(_('skip snap file: #%s available in storage %s index %s;'), $hostPageSnapFilename, $hostPageSnapStorageName, $i));
|
CLI::success(sprintf(_('skip snap file: #%s available in node %s location %s;'), $filename, $node, $location));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -93,8 +93,8 @@ define('MEMCACHED_PORT', 11211);
|
|||||||
define('SNAP_STORAGE', json_encode((object)
|
define('SNAP_STORAGE', json_encode((object)
|
||||||
[
|
[
|
||||||
'localhost' => [ // @TODO see https://github.com/YGGverse/YGGo#roadmap
|
'localhost' => [ // @TODO see https://github.com/YGGverse/YGGo#roadmap
|
||||||
[
|
'storage-1' => [
|
||||||
'directory' => __DIR__ . '/../storage/snap/hp/',
|
'directory' => __DIR__ . '/../storage/snap/hps/',
|
||||||
'quota' => [
|
'quota' => [
|
||||||
'mime' => false,
|
'mime' => false,
|
||||||
'size' => 10000000024, // @TODO
|
'size' => 10000000024, // @TODO
|
||||||
@ -104,12 +104,12 @@ define('SNAP_STORAGE', json_encode((object)
|
|||||||
'seconds' => 60*60
|
'seconds' => 60*60
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
],
|
|
||||||
// ...
|
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
|
// ...
|
||||||
|
],
|
||||||
'ftp' => [
|
'ftp' => [
|
||||||
[
|
'storage-1' => [
|
||||||
'port' => 21,
|
'port' => 21,
|
||||||
'host' => '',
|
'host' => '',
|
||||||
'username' => '',
|
'username' => '',
|
||||||
|
@ -95,23 +95,25 @@ try {
|
|||||||
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||||
|
|
||||||
// Delete host page snaps
|
// Delete host page snaps
|
||||||
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
|
||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||||
|
|
||||||
|
// Prepare filenames
|
||||||
|
$hostPageSnapPath = 'hps/' . substr(trim(chunk_split($hostPageSnap->hostPageSnapId, 1, '/'), '/'), 0, -1);
|
||||||
|
$hostPageSnapFile = $hostPageSnapPath . substr($hostPageSnap->hostPageSnapId, -1) . '.zip';
|
||||||
|
|
||||||
// Delete snap files
|
// Delete snap files
|
||||||
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
foreach (json_decode(SNAP_STORAGE) as $node => $storages) {
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
foreach ($storages as $location => $storage) {
|
||||||
|
|
||||||
// Generate storage id
|
switch ($node) {
|
||||||
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
|
||||||
|
|
||||||
switch ($name) {
|
|
||||||
|
|
||||||
case 'localhost':
|
case 'localhost':
|
||||||
|
|
||||||
@unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
if (file_exists($storage->directory . $hostPageSnapFile)) {
|
||||||
|
|
||||||
|
unlink($storage->directory . $hostPageSnapFile);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case 'ftp':
|
case 'ftp':
|
||||||
@ -119,7 +121,8 @@ try {
|
|||||||
$ftp = new Ftp();
|
$ftp = new Ftp();
|
||||||
|
|
||||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
|
$ftp->delete($hostPageSnapFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -161,23 +164,25 @@ try {
|
|||||||
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||||
|
|
||||||
// Delete host page snaps
|
// Delete host page snaps
|
||||||
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
|
||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||||
|
|
||||||
|
// Prepare filenames
|
||||||
|
$hostPageSnapPath = 'hps/' . substr(trim(chunk_split($hostPageSnap->hostPageSnapId, 1, '/'), '/'), 0, -1);
|
||||||
|
$hostPageSnapFile = $hostPageSnapPath . substr($hostPageSnap->hostPageSnapId, -1) . '.zip';
|
||||||
|
|
||||||
// Delete snap files
|
// Delete snap files
|
||||||
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
foreach (json_decode(SNAP_STORAGE) as $node => $storages) {
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
foreach ($storages as $location => $storage) {
|
||||||
|
|
||||||
// Generate storage id
|
switch ($node) {
|
||||||
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
|
||||||
|
|
||||||
switch ($name) {
|
|
||||||
|
|
||||||
case 'localhost':
|
case 'localhost':
|
||||||
|
|
||||||
@unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
if (file_exists($storage->directory . $hostPageSnapFile)) {
|
||||||
|
|
||||||
|
unlink($storage->directory . $hostPageSnapFile);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case 'ftp':
|
case 'ftp':
|
||||||
@ -185,7 +190,8 @@ try {
|
|||||||
$ftp = new Ftp();
|
$ftp = new Ftp();
|
||||||
|
|
||||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
|
$ftp->delete($hostPageSnapFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -283,23 +289,25 @@ try {
|
|||||||
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||||
|
|
||||||
// Delete host page snaps
|
// Delete host page snaps
|
||||||
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
|
||||||
|
|
||||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||||
|
|
||||||
|
// Prepare filenames
|
||||||
|
$hostPageSnapPath = 'hps/' . substr(trim(chunk_split($hostPageSnap->hostPageSnapId, 1, '/'), '/'), 0, -1);
|
||||||
|
$hostPageSnapFile = $hostPageSnapPath . substr($hostPageSnap->hostPageSnapId, -1) . '.zip';
|
||||||
|
|
||||||
// Delete snap files
|
// Delete snap files
|
||||||
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
foreach (json_decode(SNAP_STORAGE) as $node => $storages) {
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
foreach ($storages as $location => $storage) {
|
||||||
|
|
||||||
// Generate storage id
|
switch ($node) {
|
||||||
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
|
||||||
|
|
||||||
switch ($name) {
|
|
||||||
|
|
||||||
case 'localhost':
|
case 'localhost':
|
||||||
|
|
||||||
@unlink($storage->directory . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
if (file_exists($storage->directory . $hostPageSnapFile)) {
|
||||||
|
|
||||||
|
unlink($storage->directory . $hostPageSnapFile);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case 'ftp':
|
case 'ftp':
|
||||||
@ -307,7 +315,8 @@ try {
|
|||||||
$ftp = new Ftp();
|
$ftp = new Ftp();
|
||||||
|
|
||||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
$ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip');
|
|
||||||
|
$ftp->delete($hostPageSnapFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -665,45 +665,45 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
// Begin snaps
|
// Begin snaps
|
||||||
if (SNAP_STORAGE) {
|
if (SNAP_STORAGE) {
|
||||||
|
|
||||||
$hostPageSnapTimeAdded = time();
|
// Register snap in DB
|
||||||
$hostPageSnapPath = chunk_split($queueHostPage->hostPageId, 1, '/');
|
if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, time())) {
|
||||||
|
|
||||||
$hostPageSnapFilenameTmp = __DIR__ . '/../storage/tmp/snap/hp/' . $hostPageSnapPath . $hostPageSnapTimeAdded . '.zip';
|
// Default storage success
|
||||||
@mkdir(__DIR__ . '/../storage/tmp/snap/hp/' . $hostPageSnapPath, 0755, true);
|
$snapFilesExists = false;
|
||||||
|
|
||||||
// Create new ZIP container
|
// Prepare filenames
|
||||||
|
$hostPageSnapPath = 'hps/' . substr(trim(chunk_split($hostPageSnapId, 1, '/'), '/'), 0, -1);
|
||||||
|
$hostPageSnapFile = $hostPageSnapPath . substr($hostPageSnapId, -1) . '.zip';
|
||||||
|
|
||||||
|
$hostPageSnapFilenameTmp = __DIR__ . '/../storage/tmp/' . md5($hostPageSnapFile);
|
||||||
|
|
||||||
|
// Create ZIP container
|
||||||
$zip = new ZipArchive();
|
$zip = new ZipArchive();
|
||||||
|
|
||||||
if (true === $zip->open($hostPageSnapFilenameTmp, ZipArchive::CREATE)) {
|
if (true === $zip->open($hostPageSnapFilenameTmp, ZipArchive::CREATE)) {
|
||||||
|
|
||||||
// Insert compressed snap data into the tmp storage
|
// Insert compressed snap data into the tmp storage
|
||||||
if (true === $zip->addFromString('DATA', $content) &&
|
if (true === $zip->addFromString('DATA', $content) &&
|
||||||
true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $hostPageSnapTimeAdded) . PHP_EOL .
|
true === $zip->addFromString('META', sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
||||||
sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
sprintf('TIMESTAMP: %s', time()) . PHP_EOL .
|
||||||
sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL .
|
sprintf('SOURCE: %s', Filter::url($queueHostPage->hostPageURL)))) {
|
||||||
sprintf('TARGET: %s', Filter::url($queueHostPage->hostPageURL)))) {
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Done
|
|
||||||
$zip->close();
|
$zip->close();
|
||||||
|
|
||||||
// Temporarily snap file exists
|
// Temporarily snap file exists
|
||||||
if (file_exists($hostPageSnapFilenameTmp)) {
|
if (file_exists($hostPageSnapFilenameTmp)) {
|
||||||
|
|
||||||
// Register snap in DB
|
|
||||||
if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $hostPageSnapTimeAdded)) {
|
|
||||||
|
|
||||||
// Default storage success
|
|
||||||
$snapFilesExists = false;
|
|
||||||
|
|
||||||
// Copy files to each storage
|
// Copy files to each storage
|
||||||
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
foreach (json_decode(SNAP_STORAGE) as $node => $storages) {
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
foreach ($storages as $location => $storage) {
|
||||||
|
|
||||||
// Generate storage id
|
// Generate storage id
|
||||||
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
$crc32name = crc32(sprintf('%s.%s', $node, $location));
|
||||||
|
|
||||||
switch ($name) {
|
switch ($node) {
|
||||||
|
|
||||||
case 'localhost':
|
case 'localhost':
|
||||||
|
|
||||||
@ -725,7 +725,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
// Copy tmp snap file to the permanent storage
|
// Copy tmp snap file to the permanent storage
|
||||||
@mkdir($storage->directory . $hostPageSnapPath, 0755, true);
|
@mkdir($storage->directory . $hostPageSnapPath, 0755, true);
|
||||||
|
|
||||||
if (copy($hostPageSnapFilenameTmp, $storage->directory . $hostPageSnapPath . $hostPageSnapTimeAdded . '.zip')) {
|
if (copy($hostPageSnapFilenameTmp, $storage->directory . $hostPageSnapFile)) {
|
||||||
|
|
||||||
// Register storage name
|
// Register storage name
|
||||||
if ($db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time())) {
|
if ($db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time())) {
|
||||||
@ -757,9 +757,9 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
|
|
||||||
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
if ($ftp->connect($storage->host, $storage->port, $storage->username, $storage->password, $storage->directory, $storage->timeout, $storage->passive)) {
|
||||||
|
|
||||||
$ftp->mkdir('hp/' . $hostPageSnapPath, true);
|
$ftp->mkdir($hostPageSnapPath, true);
|
||||||
|
|
||||||
if ($ftp->copy($hostPageSnapFilenameTmp, 'hp/' . $hostPageSnapPath . $hostPageSnapTimeAdded . '.zip')) {
|
if ($ftp->copy($hostPageSnapFilenameTmp, $hostPageSnapFile)) {
|
||||||
|
|
||||||
// Register storage name
|
// Register storage name
|
||||||
if ($db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time())) {
|
if ($db->addHostPageSnapStorage($hostPageSnapId, $crc32name, time())) {
|
||||||
@ -775,6 +775,7 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// At least one file have been stored
|
// At least one file have been stored
|
||||||
if ($snapFilesExists) {
|
if ($snapFilesExists) {
|
||||||
@ -785,14 +786,11 @@ foreach ($db->getHostPageCrawlQueue(CRAWL_PAGE_LIMIT, time() - CRAWL_PAGE_SECOND
|
|||||||
|
|
||||||
$db->deleteHostPageSnap($hostPageSnapId);
|
$db->deleteHostPageSnap($hostPageSnapId);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delete tmp snap
|
// Delete tmp snap
|
||||||
unlink($hostPageSnapFilenameTmp);
|
unlink($hostPageSnapFilenameTmp);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Skip page links following with meta robots:nofollow attribute
|
// Skip page links following with meta robots:nofollow attribute
|
||||||
foreach (@$dom->getElementsByTagName('meta') as $meta) {
|
foreach (@$dom->getElementsByTagName('meta') as $meta) {
|
||||||
|
@ -528,15 +528,6 @@ class MySQL {
|
|||||||
return $query->fetch();
|
return $query->fetch();
|
||||||
}
|
}
|
||||||
|
|
||||||
public function findHostPageSnapByTimeAdded(int $hostPageId, int $timeAdded) {
|
|
||||||
|
|
||||||
$query = $this->_db->prepare('SELECT * FROM `hostPageSnap` WHERE `hostPageId` = ? AND `timeAdded` = ? LIMIT 1');
|
|
||||||
|
|
||||||
$query->execute([$hostPageId, $timeAdded]);
|
|
||||||
|
|
||||||
return $query->fetch();
|
|
||||||
}
|
|
||||||
|
|
||||||
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
|
public function addHostPageSnapDownload(int $hostPageSnapStorageId, string $crc32ip, int $timeAdded) {
|
||||||
|
|
||||||
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,
|
$query = $this->_db->prepare('INSERT INTO `hostPageSnapDownload` (`hostPageSnapStorageId`,
|
||||||
|
@ -52,37 +52,36 @@ switch ($type) {
|
|||||||
// Get snap details from DB
|
// Get snap details from DB
|
||||||
if ($hostPageSnap = $db->getHostPageSnap(!empty($_GET['hps']) ? (int) $_GET['hps'] : 0)) {
|
if ($hostPageSnap = $db->getHostPageSnap(!empty($_GET['hps']) ? (int) $_GET['hps'] : 0)) {
|
||||||
|
|
||||||
// Get file
|
// Prepare filenames
|
||||||
$snapFile = 'hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip';
|
$hostPageSnapPath = 'hps/' . substr(trim(chunk_split($hostPageSnap->hostPageSnapId, 1, '/'), '/'), 0, -1);
|
||||||
|
$hostPageSnapFile = $hostPageSnapPath . substr($hostPageSnap->hostPageSnapId, -1) . '.zip';
|
||||||
|
|
||||||
// Get snap file
|
// Get snap file
|
||||||
foreach (json_decode(SNAP_STORAGE) as $name => $storages) {
|
foreach (json_decode(SNAP_STORAGE) as $node => $storages) {
|
||||||
|
|
||||||
foreach ($storages as $i => $storage) {
|
foreach ($storages as $location => $storage) {
|
||||||
|
|
||||||
// Generate storage id
|
// Generate storage id
|
||||||
$crc32name = crc32(sprintf('%s.%s', $name, $i));
|
$crc32name = crc32(sprintf('%s.%s', $node, $location));
|
||||||
|
|
||||||
if ($hostPageSnapStorage = $db->findHostPageSnapStorageByCRC32Name($hostPageSnap->hostPageSnapId, $crc32name)) {
|
if ($hostPageSnapStorage = $db->findHostPageSnapStorageByCRC32Name($hostPageSnap->hostPageSnapId, $crc32name)) {
|
||||||
|
|
||||||
switch ($name) {
|
switch ($node) {
|
||||||
|
|
||||||
case 'localhost':
|
case 'localhost':
|
||||||
|
|
||||||
// Download local snap in higher priority if possible
|
// Download local snap in higher priority if possible
|
||||||
if (file_exists($storage->directory . $snapFile) &&
|
if (file_exists($storage->directory . $hostPageSnapFile) &&
|
||||||
is_readable($storage->directory . $snapFile)) {
|
is_readable($storage->directory . $hostPageSnapFile)) {
|
||||||
|
|
||||||
// Register snap download
|
// Register snap download
|
||||||
$db->addHostPageSnapDownload($hostPageSnapStorage->hostPageSnapStorageId, $crc32ip, time());
|
$db->addHostPageSnapDownload($hostPageSnapStorage->hostPageSnapStorageId, $crc32ip, time());
|
||||||
|
|
||||||
// Return snap file
|
// Return snap file
|
||||||
header('Content-Type: application/zip');
|
header('Content-Type: application/zip');
|
||||||
header(sprintf('Content-Length: %s', $snapSize));
|
header(sprintf('Content-Length: %s', filesize($storage->directory . $hostPageSnapFile)));
|
||||||
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
|
header(sprintf('Content-Disposition: filename="snap.%s.zip"', $hostPageSnap->hostPageSnapId));
|
||||||
$hostPageSnap->hostPageId,
|
readfile($storage->directory . $hostPageSnapFile);
|
||||||
$hostPageSnap->timeAdded));
|
|
||||||
readfile($storage->directory . $snapFile);
|
|
||||||
|
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
@ -99,12 +98,10 @@ switch ($type) {
|
|||||||
|
|
||||||
// Return snap file
|
// Return snap file
|
||||||
header('Content-Type: application/zip');
|
header('Content-Type: application/zip');
|
||||||
header(sprintf('Content-Length: %s', $snapSize));
|
header(sprintf('Content-Length: %s', $ftp->size($hostPageSnapFile)));
|
||||||
header(sprintf('Content-Disposition: filename="snap.%s.%s.%s.zip"', $hostPageSnap->hostPageSnapId,
|
header(sprintf('Content-Disposition: filename="snap.%s.zip"', $hostPageSnap->hostPageSnapId));
|
||||||
$hostPageSnap->hostPageId,
|
|
||||||
$hostPageSnap->timeAdded));
|
|
||||||
|
|
||||||
$ftp->get($snapFile, 'php://output');
|
$ftp->get($hostPageSnapFile, 'php://output');
|
||||||
|
|
||||||
$ftp->close();
|
$ftp->close();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user