mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-08-31 16:21:56 +00:00
add disk quota validation
This commit is contained in:
parent
7bee0ebb4d
commit
8dbb4a06af
@ -40,6 +40,12 @@ define('SPHINX_PORT', 9306);
|
||||
|
||||
// Crawler settings
|
||||
|
||||
/*
|
||||
* Stop crawler on disk quota reached (Mb)
|
||||
*
|
||||
*/
|
||||
define('CRAWL_STOP_DISK_QUOTA_MB_LEFT', 500);
|
||||
|
||||
/*
|
||||
* Pages (URI) processing limit in the crawler.php queue
|
||||
*
|
||||
|
@ -9,6 +9,13 @@ if (false === sem_acquire($semaphore, true)) {
|
||||
exit;
|
||||
}
|
||||
|
||||
// Check disk quota
|
||||
if (CRAWL_STOP_DISK_QUOTA_MB_LEFT > disk_free_space('/') / 1000000) {
|
||||
|
||||
echo 'Disk quota reached.' . PHP_EOL;
|
||||
exit;
|
||||
}
|
||||
|
||||
// Load system dependencies
|
||||
require_once('../config/app.php');
|
||||
require_once('../library/curl.php');
|
||||
|
@ -49,6 +49,9 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
// Register new host
|
||||
} else {
|
||||
|
||||
// Disk quota not reached
|
||||
if (CRAWL_STOP_DISK_QUOTA_MB_LEFT < disk_free_space('/') / 1000000) {
|
||||
|
||||
// Get robots.txt if exists
|
||||
$curl = new Curl($hostURL->string . '/robots.txt');
|
||||
|
||||
@ -74,6 +77,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
$hostRobots,
|
||||
$hostRobotsPostfix);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse page URI
|
||||
$hostPageURI = Parser::uri($q);
|
||||
|
Loading…
x
Reference in New Issue
Block a user