From eeeb3dceac3e511d87ce8127ff80c174b7f444ef Mon Sep 17 00:00:00 2001 From: ghost Date: Sat, 13 May 2023 05:54:15 +0300 Subject: [PATCH] implement index explorer --- README.md | 7 +- library/mysql.php | 8 +- library/sphinxql.php | 9 ++ public/explore.php | 274 +++++++++++++++++++++++++++++++++++++++++++ public/search.php | 34 ++++-- 5 files changed, 318 insertions(+), 14 deletions(-) create mode 100644 public/explore.php diff --git a/README.md b/README.md index bf1ae50..3687354 100644 --- a/README.md +++ b/README.md @@ -143,9 +143,11 @@ GET m=SphinxQL * [x] Web pages full text ranking search * [x] Unlimited content type groups -* [x] Safe proxy images preview -* [x] Extended syntax support * [x] Flexible settings compatible with IPv4/IPv6 networks +* [x] Index explorer +* [x] Safe images preview +* [x] Extended search syntax support +* [ ] Page history snaps ##### UI @@ -163,7 +165,6 @@ GET m=SphinxQL + [x] Search + [x] Hosts + [ ] MIME list -* [ ] Remote content DB API * [ ] Context advertising API ##### Crawler diff --git a/library/mysql.php b/library/mysql.php index 2e9b59d..2eb000f 100644 --- a/library/mysql.php +++ b/library/mysql.php @@ -218,7 +218,11 @@ class MySQL { public function getFoundHostPage(int $hostPageId) { - $query = $this->_db->prepare('SELECT `hostPage`.`uri`, + $query = $this->_db->prepare('SELECT `hostPage`.`hostPageId`, + `hostPage`.`uri`, + `hostPage`.`timeAdded`, + `hostPage`.`timeUpdated`, + `hostPage`.`mime`, `host`.`scheme`, `host`.`name`, `host`.`port` @@ -349,7 +353,7 @@ class MySQL { return $query->fetch()->total; } - public function getHostPageIdSourcesByHostPageIdTarget(int $hostPageIdTarget, int $limit) { + public function getHostPageIdSourcesByHostPageIdTarget(int $hostPageIdTarget, int $limit = 1000) { $query = $this->_db->prepare('SELECT * FROM `hostPageToHostPage` WHERE `hostPageIdTarget` = ? ORDER BY `quantity` DESC LIMIT ' . (int) $limit); diff --git a/library/sphinxql.php b/library/sphinxql.php index c259d67..8b46af5 100644 --- a/library/sphinxql.php +++ b/library/sphinxql.php @@ -39,6 +39,15 @@ class SphinxQL { return $query->fetch()->total; } + public function getHostPagesMime() { + + $query = $this->_sphinx->prepare('SELECT `mime` FROM `hostPage` GROUP BY `mime` ORDER BY `mime` ASC'); + + $query->execute(); + + return $query->fetchAll(); + } + public function searchHostPagesTotalByMime(string $keyword, string $mime) { $query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `hostPage` WHERE MATCH(?) AND `mime` = ?'); diff --git a/public/explore.php b/public/explore.php new file mode 100644 index 0000000..b38dd6c --- /dev/null +++ b/public/explore.php @@ -0,0 +1,274 @@ +getHostPagesTotal(); + +$placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the new one...'), $totalPages), + sprintf(_('Over %s pages or enter the new one...'), $totalPages), + sprintf(_('Over %s pages or enter the new one...'), $totalPages), + ]); + + + +?> + + + + + <?php echo sprintf(_('#%s info - YGGo!'), (int) $hp) ?> + + + + + + +
+
+

+ + getHostPagesMime() as $mime) { ?> + + + +
+
+
+ getFoundHostPage($hp)) { ?> +
+ getLastPageDescription($hp)) { ?> + title)) { ?> +

title ?>

+ + description)) { ?> + description ?> + + keywords)) { ?> + keywords ?> + + + + favicon + scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?> + +
+
+

+

mime ?>

+

+

timeAdded) ?>

+

+

timeUpdated) ?>

+ getTotalHostPageIdSourcesByHostPageIdTarget($hp)) { ?> +

+ +

+ getHostPageIdSourcesByHostPageIdTarget($hp) as $hostPageIdSource) { ?> + getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?> +

+ quantity, [sprintf(_('%s ref'), $hostPageIdSource->quantity), + sprintf(_('%s refs'), $hostPageIdSource->quantity), + sprintf(_('%s refs'), $hostPageIdSource->quantity), + ]) ?> + + favicon + scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?> + + | + + + +

+ + + +
+ +
+ + getTotalPagesByHttpCode(null)) { ?> + + +
+ +
+ + \ No newline at end of file diff --git a/public/search.php b/public/search.php index bdfd5eb..a07730e 100644 --- a/public/search.php +++ b/public/search.php @@ -20,13 +20,12 @@ $t = !empty($_GET['t']) ? Filter::url($_GET['t']) : 'text'; $m = !empty($_GET['m']) ? Filter::url($_GET['m']) : 'default'; $q = !empty($_GET['q']) ? Filter::url($_GET['q']) : ''; $p = !empty($_GET['p']) ? (int) $_GET['p'] : 1; -$i = !empty($_GET['i']) ? (int) $_GET['i'] : 0; // Search request if (!empty($q)) { $resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m), $t); - $results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $t, ($i ? 1 : $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT), ($i ? 1 : WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT), ($i ? 1 : $resultsTotal)); + $results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $t, $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, $resultsTotal); } else { @@ -296,7 +295,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { font-size: 11px; } - p > a { + p > a, p > a:visited, p > a:active { font-size: 11px; } @@ -337,19 +336,23 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { favicon - scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?> + scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 36 ? '...' . mb_substr(urldecode($hostPage->uri), -36) : urldecode($hostPage->uri))) ?> + + | + + mime != 'text' && $totalHostPageIdSources = $db->getTotalHostPageIdSourcesByHostPageIdTarget($result->id)) { ?>

- -

- getHostPageIdSourcesByHostPageIdTarget($result->id, ($i ? 1000 : 5)) as $j => $hostPageIdSource) { ?> + + getHostPageIdSourcesByHostPageIdTarget($result->id, 5) as $hostPageIdSource) { ?> getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?> +

quantity, [sprintf(_('%s ref'), $hostPageIdSource->quantity), sprintf(_('%s refs'), $hostPageIdSource->quantity), @@ -357,16 +360,29 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) { ]) ?> favicon - scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?> + scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 36 ? '...' . mb_substr(urldecode($hostPage->uri), -36) : urldecode($hostPage->uri))) ?> + +

+ +

+ + + +

+ - +