Browse Source

implement FTP snaps

main
ghost 1 year ago
parent
commit
875382c56e
  1. 3
      composer.json
  2. 46
      example/config.json
  3. 213
      src/cli/document/crawl.php

3
composer.json

@ -17,6 +17,7 @@
"manticoresoftware/manticoresearch-php": "^3.1", "manticoresoftware/manticoresearch-php": "^3.1",
"symfony/css-selector": "^6.3", "symfony/css-selector": "^6.3",
"symfony/dom-crawler": "^6.3", "symfony/dom-crawler": "^6.3",
"jdenticon/jdenticon": "^1.0" "jdenticon/jdenticon": "^1.0",
"yggverse/ftp": "^1.0"
} }
} }

46
example/config.json

@ -15,7 +15,7 @@
{ {
"url": "url":
{ {
"base":"http://127.0.0.1:8888" "base":"http://127.0.0.1"
}, },
"pagination": "pagination":
{ {
@ -77,12 +77,15 @@
{ {
"storage": "storage":
{ {
"tmp":{
"directory":"storage/tmp/snap"
},
"local":{ "local":{
"enabled":true, "enabled":true,
"directory":"storage/snap", "directory":"storage/snap",
"size": "size":
{ {
"max":100024 "max":10000024
}, },
"mime": "mime":
[ [
@ -99,8 +102,43 @@
}, },
"mirror": "mirror":
{ {
"enabled":false, "ftp":
"ftp":[] [
{
"enabled":false,
"connection":
{
"port":21,
"host":"",
"username":"",
"password":"",
"directory":"/snap/yo",
"timeout":30,
"passive":true,
"attempts":
{
"limit":0,
"delay":60
}
},
"size":
{
"max":10000024
},
"mime":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
}
]
} }
} }
} }

213
src/cli/document/crawl.php

@ -288,15 +288,15 @@ foreach($search->get() as $document)
); );
/// absolute /// absolute
if ('/' === substr($config->snap->storage->local->directory, 0, 1)) if ('/' === substr($config->snap->storage->tmp->directory, 0, 1))
{ {
$filepath = $config->snap->storage->local->directory; $filepath = $config->snap->storage->tmp->directory;
} }
/// relative /// relative
else else
{ {
$filepath = __DIR__ . '/../../../' . $config->snap->storage->local->directory; $filepath = __DIR__ . '/../../../' . $config->snap->storage->tmp->directory;
} }
$filepath = sprintf( $filepath = sprintf(
@ -310,16 +310,16 @@ foreach($search->get() as $document)
) )
); );
$filename = sprintf( @mkdir($filepath, 0755, true);
$tmp = sprintf(
'%s/%s.tar', '%s/%s.tar',
$filepath, $filepath,
$time $time
); );
@mkdir($filepath, 0755, true);
// Compress response to archive // Compress response to archive
$snap = new PharData($filename); $snap = new PharData($tmp);
$snap->addFromString( $snap->addFromString(
'DATA', 'DATA',
@ -340,55 +340,188 @@ foreach($search->get() as $document)
Phar::GZ Phar::GZ
); );
unlink( unlink( // remove tarball
$filename $tmp
); );
$filename = sprintf( $tmp = sprintf(
'%s.gz', '%s.gz',
$filename $tmp
); );
// Copy to mirror storage on enabled // Copy to local storage on enabled
if ($config->snap->storage->mirror->enabled) if ($config->snap->storage->local->enabled)
{ {
// @TODO copy $allowed = false;
// Snap match remote storage size/mime conditions
}
// Remove snap on local storage disabled // Check for mime allowed
if (!$config->snap->storage->local->enabled) foreach ($config->snap->storage->local->mime as $whitelist)
{ {
@unlink( if (false !== stripos($mime, $whitelist))
$filename {
); $allowed = true;
} break;
}
}
// Remove snap on out of local storage size limits // Check size limits
if ($size > $config->snap->storage->local->size->max) if ($size > $config->snap->storage->local->size->max)
{ {
@unlink( $allowed = false;
$filename }
);
}
// Remove snap on mime not allowed // Copy snap to the permanent storage
$remove = true; if ($allowed)
foreach ($config->snap->storage->local->mime as $whitelist)
{
if (false !== stripos($mime, $whitelist))
{ {
$remove = false; /// absolute
break; if ('/' === substr($config->snap->storage->local->directory, 0, 1))
{
$filepath = $config->snap->storage->local->directory;
}
/// relative
else
{
$filepath = __DIR__ . '/../../../' . $config->snap->storage->local->directory;
}
$filepath = sprintf(
'%s/%s',
$filepath,
implode(
'/',
str_split(
$md5url
)
)
);
@mkdir($filepath, 0755, true);
$filename = sprintf(
'%s/%s',
$filepath,
basename(
$tmp
)
);
copy(
$tmp,
$filename
);
} }
} }
if ($remove) // Copy to FTP mirror storage on enabled
foreach ($config->snap->storage->mirror->ftp as $ftp)
{ {
@unlink( // Resource enabled
$filename if (!$ftp->enabled)
{
continue;
}
$allowed = false;
// Check for mime allowed
foreach ($ftp->mime as $whitelist)
{
if (false !== stripos($mime, $whitelist))
{
$allowed = true;
break;
}
}
// Check size limits
if ($size > $ftp->size->max)
{
$allowed = false;
}
if (!$allowed)
{
continue;
}
// Prepare location
$filepath = implode(
'/',
str_split(
$md5url
)
);
$filename = sprintf(
'%s/%s',
$filepath,
basename(
$tmp
)
); );
// Init connection
$attempt = 1;
do {
$remote = new \Yggverse\Ftp\Client();
$connection = $remote->connect(
$ftp->connection->host,
$ftp->connection->port,
$ftp->connection->username,
$ftp->connection->password,
$ftp->connection->directory,
$ftp->connection->timeout,
$ftp->connection->passive
);
// Remote host connected
if ($connection) {
$remote->mkdir(
$filepath,
true
);
$remote->copy(
$tmp,
$filename
);
$remote->close();
// On remote connection lost, repeat attempt
} else {
// Stop connection attempts on limit provided
if ($ftp->connection->attempts->limit > 0 && $attempt > $ftp->connection->attempts->limit)
{
break;
}
// Log event
echo sprintf(
_('[attempt: %s] wait for remote storage "%s" reconnection...') . PHP_EOL,
$attempt++,
$ftp->connection->host,
);
// Delay next attempt
sleep(
$ftp->connection->attempts->delay
);
}
} while ($connection === false);
} }
// Remove tmp data
@unlink(
$tmp
);
} }
catch (Exception $exception) catch (Exception $exception)

Loading…
Cancel
Save