|
|
@ -6,6 +6,7 @@ require_once(__DIR__ . '/../library/cli.php'); |
|
|
|
require_once(__DIR__ . '/../library/mysql.php'); |
|
|
|
require_once(__DIR__ . '/../library/mysql.php'); |
|
|
|
require_once(__DIR__ . '/../library/filter.php'); |
|
|
|
require_once(__DIR__ . '/../library/filter.php'); |
|
|
|
require_once(__DIR__ . '/../library/ftp.php'); |
|
|
|
require_once(__DIR__ . '/../library/ftp.php'); |
|
|
|
|
|
|
|
require_once(__DIR__ . '/../library/helper.php'); |
|
|
|
require_once __DIR__ . '/../../vendor/autoload.php'; |
|
|
|
require_once __DIR__ . '/../../vendor/autoload.php'; |
|
|
|
|
|
|
|
|
|
|
|
// CLI only to prevent https server connection timeout |
|
|
|
// CLI only to prevent https server connection timeout |
|
|
@ -42,9 +43,30 @@ if (false === sem_acquire($semaphore, true)) { |
|
|
|
exit; |
|
|
|
exit; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Connect database |
|
|
|
// Connect DB |
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
|
|
|
|
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD); |
|
|
|
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} catch(Exception $e) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var_dump($e); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Connect Yggverse\Cache\Memory |
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$memory = new Yggverse\Cache\Memory(MEMCACHED_HOST, MEMCACHED_PORT, MEMCACHED_NAMESPACE, MEMCACHED_TIMEOUT + time()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} catch(Exception $e) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var_dump($e); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// CLI begin |
|
|
|
// CLI begin |
|
|
|
if (!empty($argv[1])) { |
|
|
|
if (!empty($argv[1])) { |
|
|
|
|
|
|
|
|
|
|
@ -401,6 +423,65 @@ if (!empty($argv[1])) { |
|
|
|
|
|
|
|
|
|
|
|
switch ($argv[2]) { |
|
|
|
switch ($argv[2]) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
case 'add': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (empty($argv[3])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger('URL required'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (false === Yggverse\Parser\Url::is($argv[3])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger('URL address invalid'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->beginTransaction(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if ($linkToDBresult = Helper::addLinkToDB($db, $memory, $argv[3])) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (count($linkToDBresult->new->hostPageId)) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::success('URL successfully registered in the crawler queue!'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->commit(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::warning('URL already registered in the crawler queue!'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->rollBack(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLI::danger('URL address not supported by this host rules!'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->rollBack(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} catch(Exception $e){ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
var_dump($e); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$db->rollBack(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exit; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
|
|
case 'rank': |
|
|
|
case 'rank': |
|
|
|
|
|
|
|
|
|
|
|
if (!empty($argv[3])) { |
|
|
|
if (!empty($argv[3])) { |
|
|
@ -675,6 +756,7 @@ CLI::default(' crawl - execute step in crawler queue') |
|
|
|
CLI::default(' clean - execute step in cleaner queue'); |
|
|
|
CLI::default(' clean - execute step in cleaner queue'); |
|
|
|
CLI::break(); |
|
|
|
CLI::break(); |
|
|
|
CLI::default(' hostPage '); |
|
|
|
CLI::default(' hostPage '); |
|
|
|
|
|
|
|
CLI::default(' add [URL] - register new address in the crawl queue'); |
|
|
|
CLI::default(' rank '); |
|
|
|
CLI::default(' rank '); |
|
|
|
CLI::default(' reindex - reindex hostPage.rank fields'); |
|
|
|
CLI::default(' reindex - reindex hostPage.rank fields'); |
|
|
|
CLI::break(); |
|
|
|
CLI::break(); |
|
|
|