From 183ae91cccf630febd9204c266560b4ea74924b0 Mon Sep 17 00:00:00 2001 From: ghost Date: Mon, 7 Aug 2023 14:00:13 +0300 Subject: [PATCH] add composer support, refactor FS tree to psr-4 --- .gitignore | 12 +-- README.md | 8 +- composer.json | 16 ++++ config/crontab.example | 7 -- example/environment/crontab | 7 ++ .../environment/mysql.cnf | 0 .../environment/sphinx.conf | 0 library/url.php | 85 ------------------- {cli => src/cli}/yggo.php | 0 {config => src/config}/app.php.example | 0 {crontab => src/crontab}/cleaner.php | 0 {crontab => src/crontab}/crawler.php | 5 +- {library => src/library}/cli.php | 0 {library => src/library}/curl.php | 0 {library => src/library}/filter.php | 0 {library => src/library}/ftp.php | 0 {library => src/library}/helper.php | 5 +- {library => src/library}/icon.php | 0 {library => src/library}/mysql.php | 0 {library => src/library}/robots.php | 0 {library => src/library}/sitemap.php | 0 {library => src/library}/sphinxql.php | 0 .../library}/vendor/simple_html_dom.php | 0 {library => src/library}/yggstate.php | 0 {public => src/public}/api.php | 0 {public => src/public}/explore.php | 0 {public => src/public}/file.php | 0 {public => src/public}/index.php | 0 {public => src/public}/robots.txt | 0 {public => src/public}/search.php | 1 - {public => src/public}/top.php | 0 {storage => src/storage}/cache/index.html | 0 {storage => src/storage}/snap/index.html | 0 {storage => src/storage}/tmp/index.html | 0 34 files changed, 41 insertions(+), 105 deletions(-) create mode 100644 composer.json delete mode 100644 config/crontab.example create mode 100644 example/environment/crontab rename config/mysql.cnf.example => example/environment/mysql.cnf (100%) rename config/sphinx.conf.example => example/environment/sphinx.conf (100%) delete mode 100644 library/url.php rename {cli => src/cli}/yggo.php (100%) rename {config => src/config}/app.php.example (100%) rename {crontab => src/crontab}/cleaner.php (100%) rename {crontab => src/crontab}/crawler.php (99%) rename {library => src/library}/cli.php (100%) rename {library => src/library}/curl.php (100%) rename {library => src/library}/filter.php (100%) rename {library => src/library}/ftp.php (100%) rename {library => src/library}/helper.php (97%) rename {library => src/library}/icon.php (100%) rename {library => src/library}/mysql.php (100%) rename {library => src/library}/robots.php (100%) rename {library => src/library}/sitemap.php (100%) rename {library => src/library}/sphinxql.php (100%) rename {library => src/library}/vendor/simple_html_dom.php (100%) rename {library => src/library}/yggstate.php (100%) rename {public => src/public}/api.php (100%) rename {public => src/public}/explore.php (100%) rename {public => src/public}/file.php (100%) rename {public => src/public}/index.php (100%) rename {public => src/public}/robots.txt (100%) rename {public => src/public}/search.php (99%) rename {public => src/public}/top.php (100%) rename {storage => src/storage}/cache/index.html (100%) rename {storage => src/storage}/snap/index.html (100%) rename {storage => src/storage}/tmp/index.html (100%) diff --git a/.gitignore b/.gitignore index 81682ce..0bfef91 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,9 @@ -.vscode -.ftpignore +/.vscode/ +/vendor/ + +/src/config/app.php -config/app.php -config/sphinx.conf +/database/yggo.mwb.bak -database/yggo.mwb.bak \ No newline at end of file +.ftpignore +composer.lock \ No newline at end of file diff --git a/README.md b/README.md index 94977f8..2f0a731 100644 --- a/README.md +++ b/README.md @@ -37,12 +37,14 @@ sphinxsearch #### Installation -* The web root dir is `/public` +* `git clone https://github.com/YGGverse/YGGo.git` +* `composer install` +* The web root dir is `/src/public` * Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder * Install [Sphinx Search Server](https://sphinxsearch.com) * Configuration examples presented at `/config` folder -* Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders are writable -* Set up the `/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt) +* Make sure `/src/storage/cache`, `/src/storage/tmp`, `/src/storage/snap` folders are writable +* Set up the `/src/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt) #### JSON API diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..e103cbf --- /dev/null +++ b/composer.json @@ -0,0 +1,16 @@ +{ + "name": "yggverse/yggo", + "description": "YGGo! Distributed Web Search Engine", + "type": "project", + "require": { + "php": ">=8.1", + "yggverse/parser": ">=0.1.0" + }, + "license": "MIT", + "autoload": { + "psr-4": { + "Yggverse\\Yggo\\": "src/" + } + }, + "minimum-stability": "alpha" +} diff --git a/config/crontab.example b/config/crontab.example deleted file mode 100644 index 9e35161..0000000 --- a/config/crontab.example +++ /dev/null @@ -1,7 +0,0 @@ -@reboot searchd -@reboot indexer --all --rotate - -30 0 * * * indexer --all --rotate - -0 0 * * * /usr/bin/php /{PATH}/YGGo/crontab/cleaner.php >> /{PATH}/cleaner.log 2>&1 -* 1-23 * * * /usr/bin/php /{PATH}/YGGo/crontab/crawler.php >> /{PATH}/crawler.log 2>&1 \ No newline at end of file diff --git a/example/environment/crontab b/example/environment/crontab new file mode 100644 index 0000000..076afe4 --- /dev/null +++ b/example/environment/crontab @@ -0,0 +1,7 @@ +@reboot searchd +@reboot indexer --all --rotate + +30 0 * * * indexer --all --rotate + +0 0 * * * /usr/bin/php /{PATH}/YGGo/src/crontab/cleaner.php >> /{PATH}/cleaner.log 2>&1 +* 1-23 * * * /usr/bin/php /{PATH}/YGGo/src/crontab/crawler.php >> /{PATH}/crawler.log 2>&1 \ No newline at end of file diff --git a/config/mysql.cnf.example b/example/environment/mysql.cnf similarity index 100% rename from config/mysql.cnf.example rename to example/environment/mysql.cnf diff --git a/config/sphinx.conf.example b/example/environment/sphinx.conf similarity index 100% rename from config/sphinx.conf.example rename to example/environment/sphinx.conf diff --git a/library/url.php b/library/url.php deleted file mode 100644 index c8c8907..0000000 --- a/library/url.php +++ /dev/null @@ -1,85 +0,0 @@ - (object) - [ - 'url' => null, - 'scheme' => null, - 'name' => null, - 'port' => null, - ], - 'page' => (object) - [ - 'url' => null, - 'uri' => null, - 'path' => null, - 'query' => null, - ] - ]; - - // Validate URL - if (!self::is($url)) { - - return false; - } - - // Parse host - if ($scheme = parse_url($url, PHP_URL_SCHEME)) { - - $result->host->url = $scheme . '://'; - $result->host->scheme = $scheme; - - } else { - - return false; - } - - if ($host = parse_url($url, PHP_URL_HOST)) { - - $result->host->url .= $host; - $result->host->name = $host; - - } else { - - return false; - } - - if ($port = parse_url($url, PHP_URL_PORT)) { - - $result->host->url .= ':' . $port; - $result->host->port = $port; - - // port is optional - } - - // Parse page - if ($path = parse_url($url, PHP_URL_PATH)) { - - $result->page->uri = $path; - $result->page->path = $path; - } - - if ($query = parse_url($url, PHP_URL_QUERY)) { - - $result->page->uri .= '?' . $query; - $result->page->query = '?' . $query; - } - - $result->page->url = $result->host->url . $result->page->uri; - - return $result; - } -} \ No newline at end of file diff --git a/cli/yggo.php b/src/cli/yggo.php similarity index 100% rename from cli/yggo.php rename to src/cli/yggo.php diff --git a/config/app.php.example b/src/config/app.php.example similarity index 100% rename from config/app.php.example rename to src/config/app.php.example diff --git a/crontab/cleaner.php b/src/crontab/cleaner.php similarity index 100% rename from crontab/cleaner.php rename to src/crontab/cleaner.php diff --git a/crontab/crawler.php b/src/crontab/crawler.php similarity index 99% rename from crontab/crawler.php rename to src/crontab/crawler.php index c552ebb..8ecc53b 100644 --- a/crontab/crawler.php +++ b/src/crontab/crawler.php @@ -27,13 +27,14 @@ require_once(__DIR__ . '/../library/ftp.php'); require_once(__DIR__ . '/../library/curl.php'); require_once(__DIR__ . '/../library/robots.php'); require_once(__DIR__ . '/../library/sitemap.php'); -require_once(__DIR__ . '/../library/url.php'); require_once(__DIR__ . '/../library/filter.php'); require_once(__DIR__ . '/../library/mysql.php'); require_once(__DIR__ . '/../library/helper.php'); require_once(__DIR__ . '/../library/yggstate.php'); require_once(__DIR__ . '/../library/vendor/simple_html_dom.php'); +require_once __DIR__ . '/../../vendor/autoload.php'; + // Check disk quota if (CRAWL_STOP_DISK_QUOTA_MB_LEFT > disk_free_space('/') / 1000000) { @@ -190,7 +191,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF $loc = sprintf('%s/%s', $queueHost->url, trim(str_ireplace($queueHost->url, '', $loc), '/')); // Validate link - if (!$link = URL::parse($loc)) { + if (!$link = Yggverse\Parser\Url::parse($loc)) { continue; } diff --git a/library/cli.php b/src/library/cli.php similarity index 100% rename from library/cli.php rename to src/library/cli.php diff --git a/library/curl.php b/src/library/curl.php similarity index 100% rename from library/curl.php rename to src/library/curl.php diff --git a/library/filter.php b/src/library/filter.php similarity index 100% rename from library/filter.php rename to src/library/filter.php diff --git a/library/ftp.php b/src/library/ftp.php similarity index 100% rename from library/ftp.php rename to src/library/ftp.php diff --git a/library/helper.php b/src/library/helper.php similarity index 97% rename from library/helper.php rename to src/library/helper.php index 8de2c7e..e541a60 100644 --- a/library/helper.php +++ b/src/library/helper.php @@ -1,8 +1,9 @@