Browse Source

add composer support, refactor FS tree to psr-4

main
ghost 9 months ago
parent
commit
183ae91ccc
  1. 12
      .gitignore
  2. 8
      README.md
  3. 16
      composer.json
  4. 7
      config/crontab.example
  5. 7
      example/environment/crontab
  6. 0
      example/environment/mysql.cnf
  7. 0
      example/environment/sphinx.conf
  8. 85
      library/url.php
  9. 0
      src/cli/yggo.php
  10. 0
      src/config/app.php.example
  11. 0
      src/crontab/cleaner.php
  12. 5
      src/crontab/crawler.php
  13. 0
      src/library/cli.php
  14. 0
      src/library/curl.php
  15. 0
      src/library/filter.php
  16. 0
      src/library/ftp.php
  17. 5
      src/library/helper.php
  18. 0
      src/library/icon.php
  19. 0
      src/library/mysql.php
  20. 0
      src/library/robots.php
  21. 0
      src/library/sitemap.php
  22. 0
      src/library/sphinxql.php
  23. 0
      src/library/vendor/simple_html_dom.php
  24. 0
      src/library/yggstate.php
  25. 0
      src/public/api.php
  26. 0
      src/public/explore.php
  27. 0
      src/public/file.php
  28. 0
      src/public/index.php
  29. 0
      src/public/robots.txt
  30. 1
      src/public/search.php
  31. 0
      src/public/top.php
  32. 0
      src/storage/cache/index.html
  33. 0
      src/storage/snap/index.html
  34. 0
      src/storage/tmp/index.html

12
.gitignore vendored

@ -1,7 +1,9 @@ @@ -1,7 +1,9 @@
.vscode
.ftpignore
/.vscode/
/vendor/
/src/config/app.php
config/app.php
config/sphinx.conf
/database/yggo.mwb.bak
database/yggo.mwb.bak
.ftpignore
composer.lock

8
README.md

@ -37,12 +37,14 @@ sphinxsearch @@ -37,12 +37,14 @@ sphinxsearch
#### Installation
* The web root dir is `/public`
* `git clone https://github.com/YGGverse/YGGo.git`
* `composer install`
* The web root dir is `/src/public`
* Deploy the database using [MySQL Workbench](https://www.mysql.com/products/workbench) project presented in the `/database` folder
* Install [Sphinx Search Server](https://sphinxsearch.com)
* Configuration examples presented at `/config` folder
* Make sure `/storage/cache`, `/storage/tmp`, `/storage/snap` folders are writable
* Set up the `/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
* Make sure `/src/storage/cache`, `/src/storage/tmp`, `/src/storage/snap` folders are writable
* Set up the `/src/crontab` by following [example](https://github.com/YGGverse/YGGo/blob/main/config/crontab.txt)
#### JSON API

16
composer.json

@ -0,0 +1,16 @@ @@ -0,0 +1,16 @@
{
"name": "yggverse/yggo",
"description": "YGGo! Distributed Web Search Engine",
"type": "project",
"require": {
"php": ">=8.1",
"yggverse/parser": ">=0.1.0"
},
"license": "MIT",
"autoload": {
"psr-4": {
"Yggverse\\Yggo\\": "src/"
}
},
"minimum-stability": "alpha"
}

7
config/crontab.example

@ -1,7 +0,0 @@ @@ -1,7 +0,0 @@
@reboot searchd
@reboot indexer --all --rotate
30 0 * * * indexer --all --rotate
0 0 * * * /usr/bin/php /{PATH}/YGGo/crontab/cleaner.php >> /{PATH}/cleaner.log 2>&1
* 1-23 * * * /usr/bin/php /{PATH}/YGGo/crontab/crawler.php >> /{PATH}/crawler.log 2>&1

7
example/environment/crontab

@ -0,0 +1,7 @@ @@ -0,0 +1,7 @@
@reboot searchd
@reboot indexer --all --rotate
30 0 * * * indexer --all --rotate
0 0 * * * /usr/bin/php /{PATH}/YGGo/src/crontab/cleaner.php >> /{PATH}/cleaner.log 2>&1
* 1-23 * * * /usr/bin/php /{PATH}/YGGo/src/crontab/crawler.php >> /{PATH}/crawler.log 2>&1

0
config/mysql.cnf.example → example/environment/mysql.cnf

0
config/sphinx.conf.example → example/environment/sphinx.conf

85
library/url.php

@ -1,85 +0,0 @@ @@ -1,85 +0,0 @@
<?php
// @TODO deprecated, replace by the common library
// https://github.com/YGGverse/parser-php
class URL {
public static function is(string $url) : bool {
return filter_var($url, FILTER_VALIDATE_URL);
}
public static function parse(string $url) : mixed {
$result = (object)
[
'host' => (object)
[
'url' => null,
'scheme' => null,
'name' => null,
'port' => null,
],
'page' => (object)
[
'url' => null,
'uri' => null,
'path' => null,
'query' => null,
]
];
// Validate URL
if (!self::is($url)) {
return false;
}
// Parse host
if ($scheme = parse_url($url, PHP_URL_SCHEME)) {
$result->host->url = $scheme . '://';
$result->host->scheme = $scheme;
} else {
return false;
}
if ($host = parse_url($url, PHP_URL_HOST)) {
$result->host->url .= $host;
$result->host->name = $host;
} else {
return false;
}
if ($port = parse_url($url, PHP_URL_PORT)) {
$result->host->url .= ':' . $port;
$result->host->port = $port;
// port is optional
}
// Parse page
if ($path = parse_url($url, PHP_URL_PATH)) {
$result->page->uri = $path;
$result->page->path = $path;
}
if ($query = parse_url($url, PHP_URL_QUERY)) {
$result->page->uri .= '?' . $query;
$result->page->query = '?' . $query;
}
$result->page->url = $result->host->url . $result->page->uri;
return $result;
}
}

0
cli/yggo.php → src/cli/yggo.php

0
config/app.php.example → src/config/app.php.example

0
crontab/cleaner.php → src/crontab/cleaner.php

5
crontab/crawler.php → src/crontab/crawler.php

@ -27,13 +27,14 @@ require_once(__DIR__ . '/../library/ftp.php'); @@ -27,13 +27,14 @@ require_once(__DIR__ . '/../library/ftp.php');
require_once(__DIR__ . '/../library/curl.php');
require_once(__DIR__ . '/../library/robots.php');
require_once(__DIR__ . '/../library/sitemap.php');
require_once(__DIR__ . '/../library/url.php');
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/mysql.php');
require_once(__DIR__ . '/../library/helper.php');
require_once(__DIR__ . '/../library/yggstate.php');
require_once(__DIR__ . '/../library/vendor/simple_html_dom.php');
require_once __DIR__ . '/../../vendor/autoload.php';
// Check disk quota
if (CRAWL_STOP_DISK_QUOTA_MB_LEFT > disk_free_space('/') / 1000000) {
@ -190,7 +191,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF @@ -190,7 +191,7 @@ foreach ($db->getHostCrawlQueue(CRAWL_HOST_LIMIT, time() - CRAWL_HOST_SECONDS_OF
$loc = sprintf('%s/%s', $queueHost->url, trim(str_ireplace($queueHost->url, '', $loc), '/'));
// Validate link
if (!$link = URL::parse($loc)) {
if (!$link = Yggverse\Parser\Url::parse($loc)) {
continue;
}

0
library/cli.php → src/library/cli.php

0
library/curl.php → src/library/curl.php

0
library/filter.php → src/library/filter.php

0
library/ftp.php → src/library/ftp.php

5
library/helper.php → src/library/helper.php

@ -1,8 +1,9 @@ @@ -1,8 +1,9 @@
<?php
require_once(__DIR__ . '/../library/url.php');
require_once(__DIR__ . '/../library/robots.php');
require_once __DIR__ . '/../../vendor/autoload.php';
class Helper {
public static function getHostSetting(MySQL $db,
@ -70,7 +71,7 @@ class Helper { @@ -70,7 +71,7 @@ class Helper {
}
// Validate link URL
if (!$link = URL::parse($link)) {
if (!$link = Yggverse\Parser\Url::parse($link)) {
return false;
}

0
library/icon.php → src/library/icon.php

0
library/mysql.php → src/library/mysql.php

0
library/robots.php → src/library/robots.php

0
library/sitemap.php → src/library/sitemap.php

0
library/sphinxql.php → src/library/sphinxql.php

0
library/vendor/simple_html_dom.php → src/library/vendor/simple_html_dom.php vendored

0
library/yggstate.php → src/library/yggstate.php

0
public/api.php → src/public/api.php

0
public/explore.php → src/public/explore.php

0
public/file.php → src/public/file.php

0
public/index.php → src/public/index.php

0
public/robots.txt → src/public/robots.txt

1
public/search.php → src/public/search.php

@ -3,7 +3,6 @@ @@ -3,7 +3,6 @@
// Load system dependencies
require_once(__DIR__ . '/../config/app.php');
require_once(__DIR__ . '/../library/filter.php');
require_once(__DIR__ . '/../library/url.php');
require_once(__DIR__ . '/../library/mysql.php');
require_once(__DIR__ . '/../library/helper.php');
require_once(__DIR__ . '/../library/sphinxql.php');

0
public/top.php → src/public/top.php

0
storage/cache/index.html → src/storage/cache/index.html vendored

0
storage/snap/index.html → src/storage/snap/index.html

0
storage/tmp/index.html → src/storage/tmp/index.html

Loading…
Cancel
Save