mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-08 22:07:56 +00:00
prevent infinitive connection on streaming resources detected
This commit is contained in:
parent
345c59b5f4
commit
4fa33afe40
@ -135,6 +135,8 @@ define('CRAWL_CURLOPT_USERAGENT', 'YGGo Search Crawler / Bot ( https://github.co
|
|||||||
/*
|
/*
|
||||||
* Skip curl download on response data size reached
|
* Skip curl download on response data size reached
|
||||||
*
|
*
|
||||||
|
* See also: CURLOPT_TIMEOUT (library/curl.php)
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
define('CRAWL_CURLOPT_PROGRESSFUNCTION_DOWNLOAD_SIZE_LIMIT', 10485760);
|
define('CRAWL_CURLOPT_PROGRESSFUNCTION_DOWNLOAD_SIZE_LIMIT', 10485760);
|
||||||
|
|
||||||
|
@ -239,7 +239,7 @@ try {
|
|||||||
$hostPagesBanned += $db->updateHostPageTimeBanned($queueHostPage->hostPageId, time());
|
$hostPagesBanned += $db->updateHostPageTimeBanned($queueHostPage->hostPageId, time());
|
||||||
|
|
||||||
// Try to receive target page location on page redirect available
|
// Try to receive target page location on page redirect available
|
||||||
$curl = new Curl($queueHostPageURL, CRAWL_CURLOPT_USERAGENT, 3, true, true);
|
$curl = new Curl($queueHostPageURL, CRAWL_CURLOPT_USERAGENT, 10, true, true);
|
||||||
|
|
||||||
// Update curl stats
|
// Update curl stats
|
||||||
$httpRequestsTotal++;
|
$httpRequestsTotal++;
|
||||||
|
@ -7,10 +7,10 @@ class Curl {
|
|||||||
|
|
||||||
public function __construct(string $url,
|
public function __construct(string $url,
|
||||||
mixed $userAgent = false,
|
mixed $userAgent = false,
|
||||||
int $connectTimeout = 3,
|
int $connectTimeout = 10,
|
||||||
bool $header = false,
|
bool $header = false,
|
||||||
bool $followLocation = false,
|
bool $followLocation = false,
|
||||||
int $maxRedirects = 3) {
|
int $maxRedirects = 10) {
|
||||||
|
|
||||||
$this->_connection = curl_init($url);
|
$this->_connection = curl_init($url);
|
||||||
|
|
||||||
@ -24,7 +24,8 @@ class Curl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
curl_setopt($this->_connection, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($this->_connection, CURLOPT_RETURNTRANSFER, true);
|
||||||
curl_setopt($this->_connection, CURLOPT_CONNECTTIMEOUT, $connectTimeout);
|
curl_setopt($this->_connection, CURLOPT_CONNECTTIMEOUT, $connectTimeout); // skip resources with long time response
|
||||||
|
curl_setopt($this->_connection, CURLOPT_TIMEOUT, $connectTimeout); // prevent infinitive connection on streaming resources detected @TODO
|
||||||
curl_setopt($this->_connection, CURLOPT_NOPROGRESS, false);
|
curl_setopt($this->_connection, CURLOPT_NOPROGRESS, false);
|
||||||
curl_setopt($this->_connection, CURLOPT_PROGRESSFUNCTION, function(
|
curl_setopt($this->_connection, CURLOPT_PROGRESSFUNCTION, function(
|
||||||
$downloadSize, $downloaded, $uploadSize, $uploaded
|
$downloadSize, $downloaded, $uploadSize, $uploaded
|
||||||
|
Loading…
Reference in New Issue
Block a user