mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-09 14:27:55 +00:00
integrate optional MEGA/cmd snap storage
This commit is contained in:
parent
f55a2dd26a
commit
1969707eeb
3
.gitignore
vendored
3
.gitignore
vendored
@ -5,6 +5,3 @@ config/app.php
|
||||
config/sphinx.conf
|
||||
|
||||
database/yggo.mwb.bak
|
||||
|
||||
storage
|
||||
public/snap
|
||||
|
14
README.md
14
README.md
@ -38,7 +38,7 @@ sphinxsearch
|
||||
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
|
||||
* Install [Sphinx Search Server](https://sphinxsearch.com)
|
||||
* Configuration examples are placed at `/config` folder
|
||||
* Make sure `/storage`, `/public/snap` folders writable
|
||||
* Make sure `/storage/cache`, `/storage/tmp`, `/public/snap` folders writable
|
||||
* Set up the `/crontab` scripts by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
|
||||
|
||||
#### JSON API
|
||||
@ -148,9 +148,11 @@ GET m=SphinxQL
|
||||
* [x] Index explorer
|
||||
* [x] Safe images preview
|
||||
* [x] Extended search syntax support
|
||||
* [ ] Compressed page snaps history
|
||||
* [x] Compressed, configurable page history snaps with multi-provider storage
|
||||
+ [x] Local
|
||||
+ [ ] Remote
|
||||
+ [x] Remote
|
||||
+ [x] MEGAcmd/FTP
|
||||
+ [ ] Yggdrasil
|
||||
|
||||
##### UI
|
||||
|
||||
@ -159,7 +161,7 @@ GET m=SphinxQL
|
||||
* [x] Content genre tabs (#1)
|
||||
* [x] Page index explorer
|
||||
+ [x] Meta
|
||||
+ [x] Snaps
|
||||
+ [x] Snaps history
|
||||
+ [x] Referrers
|
||||
* [ ] Results with found matches highlight
|
||||
* [ ] The time machine feature by content snaps history
|
||||
@ -170,6 +172,7 @@ GET m=SphinxQL
|
||||
+ [x] Manifest
|
||||
+ [x] Search
|
||||
+ [x] Hosts
|
||||
+ [ ] Snaps
|
||||
+ [ ] MIME list
|
||||
* [ ] Context advertising API
|
||||
|
||||
@ -186,9 +189,6 @@ GET m=SphinxQL
|
||||
* [x] MIME Content-type settings
|
||||
* [x] Ban non-condition links to prevent extra requests
|
||||
* [x] Debug log
|
||||
* [ ] Page content snaps generation
|
||||
+ [x] Local
|
||||
+ [ ] Remote
|
||||
* [ ] Indexing new sites homepage in higher priority
|
||||
* [ ] Redirect codes extended processing
|
||||
* [ ] Palette image index / filter
|
||||
|
@ -74,6 +74,20 @@ define('DB_PASSWORD', '');
|
||||
define('SPHINX_HOST', '127.0.0.1');
|
||||
define('SPHINX_PORT', 9306);
|
||||
|
||||
// Third-party connections (optional)
|
||||
|
||||
/*
|
||||
* Mega.nz remote storage
|
||||
*
|
||||
* FTP storage integration through MEGAcmd (https://mega.io/cmd)
|
||||
*
|
||||
* Connect mega-ftp instance on CRAWL_PAGE_MIME_SNAP_MEGA enabled
|
||||
*
|
||||
*/
|
||||
define('MEGA_FTP_HOST', '127.0.0.1');
|
||||
define('MEGA_FTP_PORT', 4990);
|
||||
define('MEGA_FTP_DIRECTORY', '');
|
||||
|
||||
// Proxy settings
|
||||
|
||||
/*
|
||||
@ -176,7 +190,21 @@ define('CRAWL_PAGE_MIME_INDEX', 'text/html,application/xhtml+xml,text/plain,imag
|
||||
* comma separated | false to disable
|
||||
*
|
||||
*/
|
||||
define('CRAWL_PAGE_MIME_SNAP_LOCAL', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico,image/svg+xml');
|
||||
define('CRAWL_PAGE_MIME_SNAP_LOCAL', 'text/html');
|
||||
|
||||
/*
|
||||
* Snap pages to mega.nz match MIME types
|
||||
*
|
||||
* comma separated | false to disable
|
||||
*
|
||||
* Requires connection:
|
||||
*
|
||||
* MEGA_FTP_HOST
|
||||
* MEGA_FTP_PORT
|
||||
* MEGA_FTP_DIRECTORY
|
||||
*
|
||||
*/
|
||||
define('CRAWL_PAGE_MIME_SNAP_MEGA', 'text/html,application/xhtml+xml,text/plain,image/webp,image/png,image/gif,image/jpeg,image/ico');
|
||||
|
||||
/*
|
||||
* Renew manifests index by timing offset provided
|
||||
|
@ -84,8 +84,35 @@ try {
|
||||
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||
|
||||
// Delete host page snaps
|
||||
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
||||
|
||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||
if (true === unlink('../public/snap/hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip')) {
|
||||
|
||||
$snapFileLocalExists = (bool) $hostPageSnap->storageLocal;
|
||||
$snapFileMegaExists = (bool) $hostPageSnap->storageMega;
|
||||
|
||||
if ($snapFileLocalExists) {
|
||||
|
||||
if (unlink('../public/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) {
|
||||
|
||||
$snapFileLocalExists = false;
|
||||
}
|
||||
}
|
||||
|
||||
if ($snapFileMegaExists) {
|
||||
|
||||
$ftp = new Ftp();
|
||||
|
||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
||||
|
||||
if ($ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) {
|
||||
|
||||
$snapFileMegaExists = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!$snapFileLocalExists && !$snapFileMegaExists) {
|
||||
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
||||
}
|
||||
}
|
||||
@ -110,8 +137,35 @@ try {
|
||||
$hostPagesToHostPageDeleted += $db->deleteHostPageToHostPage($hostPage->hostPageId);
|
||||
|
||||
// Delete host page snaps
|
||||
$snapFilePath = chunk_split($hostPage->hostPageId, 1, '/');
|
||||
|
||||
foreach ($db->getHostPageSnaps($hostPage->hostPageId) as $hostPageSnap) {
|
||||
if (true === unlink('../public/snap/hp/' . chunk_split($hostPageSnap->hostPageId, 1, '/') . $hostPageSnap->timeAdded . '.zip')) {
|
||||
|
||||
$snapFileLocalExists = (bool) $hostPageSnap->storageLocal;
|
||||
$snapFileMegaExists = (bool) $hostPageSnap->storageMega;
|
||||
|
||||
if ($snapFileLocalExists) {
|
||||
|
||||
if (unlink('../public/snap/hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) {
|
||||
|
||||
$snapFileLocalExists = false;
|
||||
}
|
||||
}
|
||||
|
||||
if ($snapFileMegaExists) {
|
||||
|
||||
$ftp = new Ftp();
|
||||
|
||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
||||
|
||||
if ($ftp->delete('hp/' . $snapFilePath . $hostPageSnap->timeAdded . '.zip')) {
|
||||
|
||||
$snapFileMegaExists = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!$snapFileLocalExists && !$snapFileMegaExists) {
|
||||
$hostPagesSnapDeleted += $db->deleteHostPageSnap($hostPageSnap->hostPageSnapId);
|
||||
}
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ if (false === sem_acquire($semaphore, true)) {
|
||||
|
||||
// Load system dependencies
|
||||
require_once('../config/app.php');
|
||||
require_once('../library/ftp.php');
|
||||
require_once('../library/curl.php');
|
||||
require_once('../library/robots.php');
|
||||
require_once('../library/filter.php');
|
||||
@ -384,7 +385,11 @@ try {
|
||||
}
|
||||
}
|
||||
|
||||
// Save local snap
|
||||
// Begin snaps
|
||||
$snapLocal = false;
|
||||
$snapMega = false;
|
||||
|
||||
// Snap local enabled and MIME in white list
|
||||
if (false !== CRAWL_PAGE_MIME_SNAP_LOCAL) {
|
||||
|
||||
foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_LOCAL) as $mime) {
|
||||
@ -394,43 +399,106 @@ try {
|
||||
// MIME type allowed in settings
|
||||
if (false !== stripos(Filter::mime($contentType), $mime)) {
|
||||
|
||||
$crc32data = crc32($content);
|
||||
|
||||
// Create not duplicated data snaps only for each storage host
|
||||
if (!$db->getHostPageSnap($queueHostPage->hostPageId, $crc32data)) {
|
||||
|
||||
$time = time();
|
||||
|
||||
$directory = chunk_split($queueHostPage->hostPageId, 1, '/');
|
||||
|
||||
@mkdir('../public/snap/hp/' . $directory, 0755, true);
|
||||
|
||||
$zip = new ZipArchive();
|
||||
|
||||
// Create new container
|
||||
if (true === $zip->open('../public/snap/hp/' . $directory . $time . '.zip', ZipArchive::CREATE)) {
|
||||
|
||||
// Insert compressed snap data
|
||||
if (true === $zip->addFromString('DATA', $content) &&
|
||||
true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $time) . PHP_EOL .
|
||||
sprintf('CRC32: %s', $crc32data . PHP_EOL .
|
||||
sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
||||
sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/snap/hp/' . $directory . $time . '.zip')) . PHP_EOL .
|
||||
sprintf('TARGET: %s', Filter::url($queueHostPageURL))))) {
|
||||
|
||||
// Update DB registry
|
||||
$hostPagesSnapAdded += $db->addHostPageSnap($queueHostPage->hostPageId, $crc32data, $time);
|
||||
}
|
||||
}
|
||||
|
||||
$zip->close();
|
||||
}
|
||||
|
||||
$snapLocal = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Snap MEGA enabled and MIME in white list
|
||||
if (false !== CRAWL_PAGE_MIME_SNAP_MEGA) {
|
||||
|
||||
foreach ((array) explode(',', CRAWL_PAGE_MIME_SNAP_MEGA) as $mime) {
|
||||
|
||||
$mime = Filter::mime($mime);
|
||||
|
||||
// MIME type allowed in settings
|
||||
if (false !== stripos(Filter::mime($contentType), $mime)) {
|
||||
|
||||
$snapMega = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// At least one snap storage match settings condition
|
||||
if ($snapLocal || $snapMega) {
|
||||
|
||||
$crc32data = crc32($content);
|
||||
|
||||
// Create not duplicated data snaps only, even new time
|
||||
if (!$db->getHostPageSnap($queueHostPage->hostPageId, $crc32data)) {
|
||||
|
||||
$snapTime = time();
|
||||
$snapPath = chunk_split($queueHostPage->hostPageId, 1, '/');
|
||||
|
||||
$snapTmp = '../storage/tmp/snap/hp/' . $snapPath . $snapTime . '.zip';
|
||||
@mkdir('../storage/tmp/snap/hp/' . $snapPath, 0755, true);
|
||||
|
||||
// Create new ZIP container
|
||||
$zip = new ZipArchive();
|
||||
|
||||
if (true === $zip->open($snapTmp, ZipArchive::CREATE)) {
|
||||
|
||||
// Insert compressed snap data into the tmp storage
|
||||
if (true === $zip->addFromString('DATA', $content) &&
|
||||
true === $zip->addFromString('META', sprintf('TIMESTAMP: %s', $snapTime) . PHP_EOL .
|
||||
sprintf('CRC32: %s', $crc32data . PHP_EOL .
|
||||
sprintf('MIME: %s', Filter::mime($contentType)) . PHP_EOL .
|
||||
sprintf('SOURCE: %s', Filter::url(WEBSITE_DOMAIN . '/explore.php?hp=' . $queueHostPage->hostPageId)) . PHP_EOL .
|
||||
sprintf('TARGET: %s', Filter::url($queueHostPageURL))))) {
|
||||
|
||||
// Done
|
||||
$zip->close();
|
||||
|
||||
// Temporarily snap file exists
|
||||
if (file_exists($snapTmp)) {
|
||||
|
||||
// Register snap in DB
|
||||
if ($hostPageSnapId = $db->addHostPageSnap($queueHostPage->hostPageId, $crc32data, $snapTime)) {
|
||||
|
||||
$hostPagesSnapAdded++;
|
||||
|
||||
// Copy tmp snap to the permanent local storage
|
||||
if ($snapLocal) {
|
||||
|
||||
@mkdir('../public/snap/hp/' . $snapPath, 0755, true);
|
||||
|
||||
if (copy($snapTmp, '../public/snap/hp/' . $snapPath . $snapTime . '.zip')) {
|
||||
|
||||
// Update snap location info
|
||||
$db->updateHostPageSnapStorageLocal($hostPageSnapId, true);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy tmp snap to the permanent MEGA storage
|
||||
if ($snapMega) {
|
||||
|
||||
$ftp = new Ftp();
|
||||
|
||||
if ($ftp->connect(MEGA_FTP_HOST, MEGA_FTP_PORT, null, null, MEGA_FTP_DIRECTORY)) {
|
||||
|
||||
$ftp->mkdir('hp/' . $snapPath, true);
|
||||
|
||||
if ($ftp->copy($snapTmp, 'hp/' . $snapPath . $snapTime . '.zip')) {
|
||||
|
||||
// Update snap location info
|
||||
$db->updateHostPageSnapStorageMega($hostPageSnapId, true);
|
||||
}
|
||||
|
||||
$ftp->close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove tmp
|
||||
@unlink($snapTmp);
|
||||
}
|
||||
}
|
||||
|
||||
// Begin page links collection
|
||||
$links = [];
|
||||
|
||||
|
Binary file not shown.
90
library/ftp.php
Normal file
90
library/ftp.php
Normal file
@ -0,0 +1,90 @@
|
||||
<?php
|
||||
|
||||
class Ftp {
|
||||
|
||||
private $_connection;
|
||||
private $_passive;
|
||||
|
||||
public function __construct(bool $passive = true) {
|
||||
|
||||
$this->_passive = $passive;
|
||||
}
|
||||
|
||||
public function connect(string $host,
|
||||
int $port,
|
||||
mixed $login = null,
|
||||
mixed $password = null,
|
||||
string $directory = '/',
|
||||
int $timeout = 90) {
|
||||
|
||||
if (!$this->_connection = ftp_connect($host, $port, $timeout)) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ftp_pasv($this->_connection, $this->_passive)) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!empty($login) && !empty($password)) {
|
||||
|
||||
if (!ftp_login($this->_connection, $login, $password)) {
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return ftp_chdir($this->_connection, $directory);
|
||||
}
|
||||
|
||||
public function delete(string $target) {
|
||||
|
||||
return ftp_delete($this->_connection, $target);
|
||||
}
|
||||
|
||||
public function copy(string $source, string $target) {
|
||||
|
||||
return ftp_put($this->_connection, $target, $source);
|
||||
}
|
||||
|
||||
public function get(string $source, string $target) {
|
||||
|
||||
return ftp_get($this->_connection, $source, $target);
|
||||
}
|
||||
|
||||
public function mkdir(string $name, bool $recursive = false) {
|
||||
|
||||
if ($recursive) {
|
||||
|
||||
$path = [];
|
||||
|
||||
foreach ((array) explode('/', trim($name, '/')) as $directory) {
|
||||
|
||||
$path[] = $directory;
|
||||
|
||||
@ftp_mkdir($this->_connection, implode('/', $path));
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
@ftp_mkdir($this->_connection, $name);
|
||||
}
|
||||
}
|
||||
|
||||
public function size(string $target) {
|
||||
|
||||
if (-1 !== $size = ftp_size($this->_connection, $target)) {
|
||||
|
||||
return $size;
|
||||
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public function close() {
|
||||
|
||||
return ftp_close($this->_connection);
|
||||
}
|
||||
}
|
@ -362,12 +362,30 @@ class MySQL {
|
||||
|
||||
public function addHostPageSnap(int $hostPageId, string $crc32data, int $timeAdded) {
|
||||
|
||||
$query = $this->_db->prepare('INSERT IGNORE INTO `hostPageSnap` (`hostPageId`,
|
||||
`crc32data`,
|
||||
`timeAdded`) VALUES (?, ?, ?)');
|
||||
$query = $this->_db->prepare('INSERT INTO `hostPageSnap` (`hostPageId`,
|
||||
`crc32data`,
|
||||
`timeAdded`) VALUES (?, ?, ?)');
|
||||
|
||||
$query->execute([$hostPageId, $crc32data, $timeAdded]);
|
||||
|
||||
return $this->_db->lastInsertId();
|
||||
}
|
||||
|
||||
public function updateHostPageSnapStorageLocal(int $hostPageSnapId, mixed $value) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostPageSnap` SET `storageLocal` = ? WHERE `hostPageSnapId` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$value, $hostPageSnapId]);
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
public function updateHostPageSnapStorageMega(int $hostPageSnapId, mixed $value) {
|
||||
|
||||
$query = $this->_db->prepare('UPDATE `hostPageSnap` SET `storageMega` = ? WHERE `hostPageSnapId` = ? LIMIT 1');
|
||||
|
||||
$query->execute([$value, $hostPageSnapId]);
|
||||
|
||||
return $query->rowCount();
|
||||
}
|
||||
|
||||
|
Binary file not shown.
Before Width: | Height: | Size: 159 KiB After Width: | Height: | Size: 161 KiB |
0
storage/tmp/index.html
Normal file
0
storage/tmp/index.html
Normal file
Loading…
Reference in New Issue
Block a user