mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 18:35:04 +00:00
add YGGstate DB crawl integration
This commit is contained in:
parent
3d9db381e8
commit
034a683df7
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
_Проект присвячується захисникам міста Бахмут_
|
_Проект присвячується захисникам міста Бахмут_
|
||||||
|
|
||||||
Written by inspiration to explore [Yggdrasil](https://yggdrasil-network.github.io) ecosystem, because of last [YaCy](https://yacy.net/) node there was discontinued.
|
Written by inspiration to explore [Yggdrasil](https://github.com/yggdrasil-network) ecosystem, because of last [YaCy](https://yacy.net/) node there was discontinued.
|
||||||
This engine also could be useful for crawling regular websites, small business resources, local networks.
|
This engine also could be useful for crawling regular websites, small business resources, local networks.
|
||||||
|
|
||||||
The project goal - simple interface, clear architecture and lightweight server requirement.
|
The project goal - simple interface, clear architecture and lightweight server requirement.
|
||||||
@ -207,6 +207,11 @@ GET m=SphinxQL
|
|||||||
+ [ ] Atom
|
+ [ ] Atom
|
||||||
* [ ] Palette image index / filter
|
* [ ] Palette image index / filter
|
||||||
* [ ] Crawl queue balancer, that depends of CPU available
|
* [ ] Crawl queue balancer, that depends of CPU available
|
||||||
|
* [x] Networks integration
|
||||||
|
+ [x] [yggdrasil](https://github.com/yggdrasil-network)
|
||||||
|
+ [x] [YGGstate](https://github.com/YGGverse/YGGstate) (unlimited nodes)
|
||||||
|
+ [x] DB
|
||||||
|
+ [ ] API
|
||||||
|
|
||||||
##### Cleaner
|
##### Cleaner
|
||||||
|
|
||||||
@ -272,6 +277,7 @@ See also: [SQLite tree](https://github.com/YGGverse/YGGo/tree/sqliteway)
|
|||||||
#### See also
|
#### See also
|
||||||
|
|
||||||
* [YGGwave ~ The Radio Catalog](https://github.com/YGGverse/YGGwave)
|
* [YGGwave ~ The Radio Catalog](https://github.com/YGGverse/YGGwave)
|
||||||
|
* [YGGstate - Yggdrasil Network Analytics](https://github.com/YGGverse/YGGstate)
|
||||||
|
|
||||||
#### Feedback
|
#### Feedback
|
||||||
|
|
||||||
|
@ -320,6 +320,39 @@ define('CRAWL_MANIFEST', true);
|
|||||||
*/
|
*/
|
||||||
define('CRAWL_MANIFEST_API_VERSION', 0.13);
|
define('CRAWL_MANIFEST_API_VERSION', 0.13);
|
||||||
|
|
||||||
|
|
||||||
|
// Integrations
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Crawl YGGstate for peers to descover new hosts
|
||||||
|
*
|
||||||
|
* Yggdrasil networks only
|
||||||
|
*
|
||||||
|
* Read more:
|
||||||
|
* https://github.com/YGGverse/YGGstate
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
define('CRAWL_YGGSTATE', json_encode((object)
|
||||||
|
[
|
||||||
|
'db' =>
|
||||||
|
[
|
||||||
|
[
|
||||||
|
// Conditions
|
||||||
|
'peer_min_last_uptime' => 60*60*24, // skip short-term connections, seconds
|
||||||
|
'timeout' => 60*60*24, // these calls running in crontab/crawler qeue, prevent remote server abuse
|
||||||
|
|
||||||
|
// Connection
|
||||||
|
'port' => 3306,
|
||||||
|
'host' => '',
|
||||||
|
'database' => '',
|
||||||
|
'username' => '',
|
||||||
|
'password' => '',
|
||||||
|
],
|
||||||
|
// ...
|
||||||
|
],
|
||||||
|
])
|
||||||
|
);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove host ban after following time
|
* Remove host ban after following time
|
||||||
*
|
*
|
||||||
|
@ -31,6 +31,7 @@ require_once(__DIR__ . '/../library/url.php');
|
|||||||
require_once(__DIR__ . '/../library/filter.php');
|
require_once(__DIR__ . '/../library/filter.php');
|
||||||
require_once(__DIR__ . '/../library/mysql.php');
|
require_once(__DIR__ . '/../library/mysql.php');
|
||||||
require_once(__DIR__ . '/../library/helper.php');
|
require_once(__DIR__ . '/../library/helper.php');
|
||||||
|
require_once(__DIR__ . '/../library/yggstate.php');
|
||||||
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
|
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
|
||||||
|
|
||||||
// Check disk quota
|
// Check disk quota
|
||||||
@ -84,6 +85,49 @@ try {
|
|||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check YGGstate connections to discover new hosts
|
||||||
|
if (CRAWL_YGGSTATE) {
|
||||||
|
|
||||||
|
foreach (json_decode(CRAWL_YGGSTATE) as $server => $nodes) {
|
||||||
|
|
||||||
|
foreach ($nodes as $i => $node) {
|
||||||
|
|
||||||
|
switch ($server) {
|
||||||
|
|
||||||
|
case 'db':
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
if (!$memcached->get(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i))) {
|
||||||
|
|
||||||
|
$yggStateDB = new YGGstate($node->host, $node->port, $node->database, $node->username, $node->password);
|
||||||
|
|
||||||
|
foreach ($yggStatePeers = $yggStateDB->getPeersByMinLastUptime($node->peer_min_last_uptime) as $yggStatePeer) {
|
||||||
|
|
||||||
|
// Register new host
|
||||||
|
if ($linkToDBresult = Helper::addLinkToDB($db, $memcached, sprintf('http://[%s]/', $yggStatePeer->address))) {
|
||||||
|
|
||||||
|
$hostsAdded += count($linkToDBresult->new->hostId);
|
||||||
|
$hostPagesAdded += count($linkToDBresult->new->hostPageId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$memcached->set(sprintf('Crontab.crawler.YGGstate.%s.%s.timeout', $server, $i), true, time() + $node->timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch(Exception $e) {
|
||||||
|
|
||||||
|
var_dump($e);
|
||||||
|
|
||||||
|
continue 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Process hosts crawl queue
|
// Process hosts crawl queue
|
||||||
foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OFFSET) as $queueHost) {
|
foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OFFSET) as $queueHost) {
|
||||||
|
|
||||||
|
80
library/yggstate.php
Normal file
80
library/yggstate.php
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
class YGGstate {
|
||||||
|
|
||||||
|
private PDO $_db;
|
||||||
|
|
||||||
|
private object $_debug;
|
||||||
|
|
||||||
|
public function __construct(string $host, int $port, string $database, string $username, string $password) {
|
||||||
|
|
||||||
|
$this->_db = new PDO('mysql:dbname=' . $database . ';host=' . $host . ';port=' . $port . ';charset=utf8', $username, $password, [PDO::MYSQL_ATTR_INIT_COMMAND => 'SET NAMES utf8']);
|
||||||
|
$this->_db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
|
||||||
|
$this->_db->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_OBJ);
|
||||||
|
$this->_db->setAttribute(PDO::ATTR_TIMEOUT, 600);
|
||||||
|
|
||||||
|
$this->_debug = (object)
|
||||||
|
[
|
||||||
|
'query' => (object)
|
||||||
|
[
|
||||||
|
'select' => (object)
|
||||||
|
[
|
||||||
|
'total' => 0
|
||||||
|
],
|
||||||
|
'insert' => (object)
|
||||||
|
[
|
||||||
|
'total' => 0
|
||||||
|
],
|
||||||
|
'update' => (object)
|
||||||
|
[
|
||||||
|
'total' => 0
|
||||||
|
],
|
||||||
|
'delete' => (object)
|
||||||
|
[
|
||||||
|
'total' => 0
|
||||||
|
],
|
||||||
|
]
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tools
|
||||||
|
public function beginTransaction() {
|
||||||
|
|
||||||
|
$this->_db->beginTransaction();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function commit() {
|
||||||
|
|
||||||
|
$this->_db->commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function rollBack() {
|
||||||
|
|
||||||
|
$this->_db->rollBack();
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getDebug() {
|
||||||
|
|
||||||
|
return $this->_debug;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Peer
|
||||||
|
public function getPeersByMinLastUptime(int $time) {
|
||||||
|
|
||||||
|
$this->_debug->query->select->total++;
|
||||||
|
|
||||||
|
$query = $this->_db->prepare('SELECT * FROM `peer`
|
||||||
|
|
||||||
|
HAVING (
|
||||||
|
SELECT `peerRemote`.`uptime`
|
||||||
|
FROM `peerRemote`
|
||||||
|
WHERE `peerRemote`.`peerId` = `peer`.`peerId`
|
||||||
|
ORDER BY `timeAdded` DESC
|
||||||
|
LIMIT 1
|
||||||
|
) >= ?');
|
||||||
|
|
||||||
|
$query->execute([$time]);
|
||||||
|
|
||||||
|
return $query->fetchAll();
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user