mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-01-31 08:54:17 +00:00
add curl requests debug
This commit is contained in:
parent
1aba060d34
commit
ea04220de3
@ -21,6 +21,11 @@ $db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||
// Debug
|
||||
$timeStart = microtime(true);
|
||||
|
||||
$requestsTotal = 0;
|
||||
$requestSizeTotal = 0;
|
||||
$downloadSizeTotal = 0;
|
||||
$requestsTotalTime = 0;
|
||||
|
||||
$hostsTotal = $db->getTotalHosts();
|
||||
$manifestsTotal = $db->getTotalManifests();
|
||||
$hostsUpdated = 0;
|
||||
@ -44,6 +49,12 @@ try {
|
||||
// Get robots.txt if exists
|
||||
$curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
} else {
|
||||
@ -131,6 +142,12 @@ try {
|
||||
|
||||
$curl = new Curl($manifest->url);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
// Skip processing non 200 code
|
||||
if (200 != $curl->getCode()) {
|
||||
|
||||
@ -195,8 +212,16 @@ echo 'Hosts total: ' . $hostsTotal . PHP_EOL;
|
||||
echo 'Hosts updated: ' . $hostsUpdated . PHP_EOL;
|
||||
echo 'Hosts pages deleted: ' . $hostsPagesDeleted . PHP_EOL;
|
||||
echo 'Hosts images deleted: ' . $hostsImagesDeleted . PHP_EOL;
|
||||
|
||||
echo 'Manifests total: ' . $manifestsTotal . PHP_EOL;
|
||||
echo 'Manifests deleted: ' . $manifestsDeleted . PHP_EOL;
|
||||
|
||||
echo 'Host page bans removed: ' . $hostPagesBansRemoved . PHP_EOL;
|
||||
echo 'Host images bans removed: ' . $hostImagesBansRemoved . PHP_EOL;
|
||||
echo 'Execution time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL;
|
||||
|
||||
echo 'Requests total: ' . $requestsTotal . PHP_EOL;
|
||||
echo 'Requests total size: ' . $requestSizeTotal . PHP_EOL;
|
||||
echo 'Download total size: ' . $downloadSizeTotal . PHP_EOL;
|
||||
echo 'Requests total time: ' . $requestsTotalTime / 1000000 . PHP_EOL;
|
||||
|
||||
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL;
|
@ -27,6 +27,11 @@ if (CRAWL_STOP_DISK_QUOTA_MB_LEFT > disk_free_space('/') / 1000000) {
|
||||
// Debug
|
||||
$timeStart = microtime(true);
|
||||
|
||||
$requestsTotal = 0;
|
||||
$requestSizeTotal = 0;
|
||||
$downloadSizeTotal = 0;
|
||||
$requestsTotalTime = 0;
|
||||
|
||||
$hostPagesProcessed = 0;
|
||||
$hostImagesProcessed = 0;
|
||||
$manifestsProcessed = 0;
|
||||
@ -51,6 +56,12 @@ try {
|
||||
|
||||
$curl = new Curl($queueManifest->url);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
// Update manifest index anyway, with the current time and http code
|
||||
$manifestsProcessed += $db->updateManifestCrawlQueue($queueManifest->manifestId, time(), $curl->getCode());
|
||||
|
||||
@ -108,6 +119,12 @@ try {
|
||||
// Begin hosts collection
|
||||
$curl = new Curl($remoteManifest->result->api->hosts);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
// Skip processing non 200 code
|
||||
if (200 != $curl->getCode()) {
|
||||
|
||||
@ -166,6 +183,12 @@ try {
|
||||
// Get robots.txt if exists
|
||||
$curl = new Curl($hostURL . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
} else {
|
||||
@ -230,6 +253,12 @@ try {
|
||||
// Init image request
|
||||
$curl = new Curl($queueHostImageURL, CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
// Update image index anyway, with the current time and http code
|
||||
$hostImagesProcessed += $db->updateHostImageCrawlQueue($queueHostImage->hostImageId, time(), $curl->getCode());
|
||||
|
||||
@ -304,6 +333,12 @@ try {
|
||||
// Init page request
|
||||
$curl = new Curl($queueHostPageURL, CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
// Update page index anyway, with the current time and http code
|
||||
$hostPagesProcessed += $db->updateHostPageCrawlQueue($queueHostPage->hostPageId, time(), $curl->getCode());
|
||||
|
||||
@ -468,6 +503,12 @@ try {
|
||||
// Get robots.txt if exists
|
||||
$curl = new Curl($hostImageURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
} else {
|
||||
@ -624,6 +665,12 @@ try {
|
||||
// Get robots.txt if exists
|
||||
$curl = new Curl($hostURL->string . '/robots.txt', CRAWL_CURLOPT_USERAGENT);
|
||||
|
||||
// Update curl stats
|
||||
$requestsTotal++;
|
||||
$requestSizeTotal += $curl->getSizeRequest();
|
||||
$downloadSizeTotal += $curl->getSizeDownload();
|
||||
$requestsTotalTime += $curl->getTotalTime();
|
||||
|
||||
if (200 == $curl->getCode() && false !== stripos($curl->getContent(), 'user-agent:')) {
|
||||
$hostRobots = $curl->getContent();
|
||||
} else {
|
||||
@ -701,12 +748,21 @@ try {
|
||||
echo 'Pages processed: ' . $hostPagesProcessed . PHP_EOL;
|
||||
echo 'Pages indexed: ' . $hostPagesIndexed . PHP_EOL;
|
||||
echo 'Pages added: ' . $hostPagesAdded . PHP_EOL;
|
||||
|
||||
echo 'Images processed: ' . $hostImagesProcessed . PHP_EOL;
|
||||
echo 'Images indexed: ' . $hostImagesIndexed . PHP_EOL;
|
||||
echo 'Images added: ' . $hostImagesAdded . PHP_EOL;
|
||||
|
||||
echo 'Manifests processed: ' . $manifestsProcessed . PHP_EOL;
|
||||
echo 'Manifests indexed: ' . $manifestsIndexed . PHP_EOL;
|
||||
|
||||
echo 'Hosts added: ' . $hostsAdded . PHP_EOL;
|
||||
echo 'Hosts pages banned: ' . $hostPagesBanned . PHP_EOL;
|
||||
echo 'Hosts images banned: ' . $hostImagesBanned . PHP_EOL;
|
||||
|
||||
echo 'Requests total: ' . $requestsTotal . PHP_EOL;
|
||||
echo 'Requests total size: ' . $requestSizeTotal . PHP_EOL;
|
||||
echo 'Download total size: ' . $downloadSizeTotal . PHP_EOL;
|
||||
echo 'Requests total time: ' . $requestsTotalTime / 1000000 . PHP_EOL;
|
||||
|
||||
echo 'Total time: ' . microtime(true) - $timeStart . PHP_EOL . PHP_EOL;
|
||||
|
@ -47,6 +47,21 @@ class Curl {
|
||||
return curl_getinfo($this->_connection, CURLINFO_CONTENT_TYPE);
|
||||
}
|
||||
|
||||
public function getSizeDownload() {
|
||||
|
||||
return curl_getinfo($this->_connection, CURLINFO_SIZE_DOWNLOAD);
|
||||
}
|
||||
|
||||
public function getSizeRequest() {
|
||||
|
||||
return curl_getinfo($this->_connection, CURLINFO_REQUEST_SIZE);
|
||||
}
|
||||
|
||||
public function getTotalTime() {
|
||||
|
||||
return curl_getinfo($this->_connection, CURLINFO_TOTAL_TIME_T);
|
||||
}
|
||||
|
||||
public function getContent() {
|
||||
|
||||
return $this->_response;
|
||||
|
Loading…
x
Reference in New Issue
Block a user