mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-09 22:37:57 +00:00
skip curl download on response data size reached
This commit is contained in:
parent
d7a5f7ef84
commit
d186fff48f
@ -26,6 +26,7 @@ php-dom
|
||||
php-pdo
|
||||
php-curl
|
||||
php-gd
|
||||
php-mbstring
|
||||
php-mysql
|
||||
sphinxsearch
|
||||
```
|
||||
|
@ -118,6 +118,12 @@ define('CRAWL_LOG_SECONDS_OFFSET', 60*60*24*30);
|
||||
*/
|
||||
define('CRAWL_CURLOPT_USERAGENT', 'YGGo Search Crawler / Bot ( https://github.com/YGGverse/YGGo )');
|
||||
|
||||
/*
|
||||
* Skip curl download on response data size reached
|
||||
*
|
||||
*/
|
||||
define('CRAWL_CURLOPT_PROGRESSFUNCTION_DOWNLOAD_SIZE_LIMIT', 10485760);
|
||||
|
||||
/*
|
||||
* Stop crawler on disk quota reached (Mb)
|
||||
*
|
||||
|
@ -149,7 +149,7 @@ try {
|
||||
|
||||
$delete = false;
|
||||
|
||||
$curl = new Curl($manifest->url);
|
||||
$curl = new Curl($manifest->url, CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$httpRequestsTotal++;
|
||||
|
@ -54,7 +54,7 @@ try {
|
||||
// Process manifests crawl queue
|
||||
foreach ($db->getManifestCrawlQueue(CRAWL_MANIFEST_LIMIT, time() - CRAWL_MANIFEST_SECONDS_OFFSET) as $queueManifest) {
|
||||
|
||||
$curl = new Curl($queueManifest->url);
|
||||
$curl = new Curl($queueManifest->url, CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$httpRequestsTotal++;
|
||||
@ -117,7 +117,7 @@ try {
|
||||
}
|
||||
|
||||
// Begin hosts collection
|
||||
$curl = new Curl($remoteManifest->result->api->hosts);
|
||||
$curl = new Curl($remoteManifest->result->api->hosts, CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$httpRequestsTotal++;
|
||||
|
@ -11,6 +11,12 @@ class Curl {
|
||||
|
||||
curl_setopt($this->_connection, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($this->_connection, CURLOPT_CONNECTTIMEOUT, $connectTimeout);
|
||||
curl_setopt($this->_connection, CURLOPT_NOPROGRESS, false);
|
||||
curl_setopt($this->_connection, CURLOPT_PROGRESSFUNCTION, function(
|
||||
$downloadSize, $downloaded, $uploadSize, $uploaded
|
||||
){
|
||||
return ($downloaded > CRAWL_CURLOPT_PROGRESSFUNCTION_DOWNLOAD_SIZE_LIMIT) ? 1 : 0;
|
||||
});
|
||||
|
||||
if ($userAgent) {
|
||||
curl_setopt($this->_connection, CURLOPT_USERAGENT, (string) $userAgent);
|
||||
|
Loading…
Reference in New Issue
Block a user