add composer support, refactor FS tree to psr-4

This commit is contained in:
ghost 2023-08-07 14:00:13 +03:00
parent 7bb1eb5b61
commit 183ae91ccc
34 changed files with 42 additions and 106 deletions

14
.gitignore vendored
View File

@ -1,7 +1,9 @@
.vscode
/.vscode/
/vendor/
/src/config/app.php
/database/yggo.mwb.bak
.ftpignore
config/app.php
config/sphinx.conf
database/yggo.mwb.bak
composer.lock

View File

@ -37,12 +37,14 @@ sphinxsearch
#### Installation
* The web root dir is `/public`
* `git clone https://github.com/YGGverse/YGGo.git`
* `composer install`
* The web root dir is `/src/public`
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
* Install [Sphinx Search Server](https://sphinxsearch.com)
* Configuration examples presented at `/config` folder
* Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders are writable
* Set up the `/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
* Make sure `/src/storage/cache`, `/src/storage/tmp`, `/src/storage/snap` folders are writable
* Set up the `/src/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
#### JSON API

16
composer.json Normal file
View File

@ -0,0 +1,16 @@
{
"name": "yggverse/yggo",
"description": "YGGo! Distributed Web Search Engine",
"type": "project",
"require": {
"php": ">=8.1",
"yggverse/parser": ">=0.1.0"
},
"license": "MIT",
"autoload": {
"psr-4": {
"Yggverse\\Yggo\\": "src/"
}
},
"minimum-stability": "alpha"
}

View File

@ -1,7 +0,0 @@
@reboot searchd
@reboot indexer --all --rotate
30 0 * * * indexer --all --rotate
0 0 * * * /usr/bin/php /{PATH}/YGGo/crontab/cleaner.php >> /{PATH}/cleaner.log 2>&1
* 1-23 * * * /usr/bin/php /{PATH}/YGGo/crontab/crawler.php >> /{PATH}/crawler.log 2>&1

View File

@ -0,0 +1,7 @@
@reboot searchd
@reboot indexer --all --rotate
30 0 * * * indexer --all --rotate
0 0 * * * /usr/bin/php /{PATH}/YGGo/src/crontab/cleaner.php >> /{PATH}/cleaner.log 2>&1
* 1-23 * * * /usr/bin/php /{PATH}/YGGo/src/crontab/crawler.php >> /{PATH}/crawler.log 2>&1

View File

@ -1,85 +0,0 @@
<?php
// @TODO deprecated, replace by the common library
// https://github.com/YGGverse/parser-php
class URL {
public static function is(string $url) : bool {
return filter_var($url, FILTER_VALIDATE_URL);
}
public static function parse(string $url) : mixed {
$result = (object)
[
'host' => (object)
[
'url' => null,
'scheme' => null,
'name' => null,
'port' => null,
],
'page' => (object)
[
'url' => null,
'uri' => null,
'path' => null,
'query' => null,
]
];
// Validate URL
if (!self::is($url)) {
return false;
}
// Parse host
if ($scheme = parse_url($url, PHP_URL_SCHEME)) {
$result->host->url = $scheme . '://';
$result->host->scheme = $scheme;
} else {
return false;
}
if ($host = parse_url($url, PHP_URL_HOST)) {
$result->host->url .= $host;
$result->host->name = $host;
} else {
return false;
}
if ($port = parse_url($url, PHP_URL_PORT)) {
$result->host->url .= ':' . $port;
$result->host->port = $port;
// port is optional
}
// Parse page
if ($path = parse_url($url, PHP_URL_PATH)) {
$result->page->uri = $path;
$result->page->path = $path;
}
if ($query = parse_url($url, PHP_URL_QUERY)) {
$result->page->uri .= '?' . $query;
$result->page->query = '?' . $query;
}
$result->page->url = $result->host->url . $result->page->uri;
return $result;
}
}

View File

@ -27,13 +27,14 @@ require_once(__DIR__ . '/../library/ftp.php');
require_once(__DIR__ . '/../library/curl.php');
require_once(__DIR__ . '/../library/robots.php');
require_once(__DIR__ . '/../library/sitemap.php');
require_once(__DIR__ . '/../library/url.php');
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/mysql.php');
require_once(__DIR__ . '/../library/helper.php');
require_once(__DIR__ . '/../library/yggstate.php');
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
require_once __DIR__ . '/../../vendor/autoload.php';
// Check disk quota
if (CRAWL_STOP_DISK_QUOTA_MB_LEFT > disk_free_space('/') / 1000000) {
@ -190,7 +191,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
$loc = sprintf('%s/%s', $queueHost->url, trim(str_ireplace($queueHost->url, '', $loc), '/'));
// Validate link
if (!$link = URL::parse($loc)) {
if (!$link = Yggverse\Parser\Url::parse($loc)) {
continue;
}

View File

@ -1,8 +1,9 @@
<?php
require_once(__DIR__ . '/../library/url.php');
require_once(__DIR__ . '/../library/robots.php');
require_once __DIR__ . '/../../vendor/autoload.php';
class Helper {
public static function getHostSetting(MySQL $db,
@ -70,7 +71,7 @@ class Helper {
}
// Validate link URL
if (!$link = URL::parse($link)) {
if (!$link = Yggverse\Parser\Url::parse($link)) {
return false;
}

View File

@ -3,7 +3,6 @@
// Load system dependencies
require_once(__DIR__ . '/../config/app.php');
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/url.php');
require_once(__DIR__ . '/../library/mysql.php');
require_once(__DIR__ . '/../library/helper.php');
require_once(__DIR__ . '/../library/sphinxql.php');