mirror of
https://github.com/YGGverse/YGGo.git
synced 2025-02-03 10:25:52 +00:00
implement index explorer
This commit is contained in:
parent
377b519a2c
commit
eeeb3dceac
@ -143,9 +143,11 @@ GET m=SphinxQL
|
||||
|
||||
* [x] Web pages full text ranking search
|
||||
* [x] Unlimited content type groups
|
||||
* [x] Safe proxy images preview
|
||||
* [x] Extended syntax support
|
||||
* [x] Flexible settings compatible with IPv4/IPv6 networks
|
||||
* [x] Index explorer
|
||||
* [x] Safe images preview
|
||||
* [x] Extended search syntax support
|
||||
* [ ] Page history snaps
|
||||
|
||||
##### UI
|
||||
|
||||
@ -163,7 +165,6 @@ GET m=SphinxQL
|
||||
+ [x] Search
|
||||
+ [x] Hosts
|
||||
+ [ ] MIME list
|
||||
* [ ] Remote content DB API
|
||||
* [ ] Context advertising API
|
||||
|
||||
##### Crawler
|
||||
|
@ -218,7 +218,11 @@ class MySQL {
|
||||
|
||||
public function getFoundHostPage(int $hostPageId) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT `hostPage`.`uri`,
|
||||
$query = $this->_db->prepare('SELECT `hostPage`.`hostPageId`,
|
||||
`hostPage`.`uri`,
|
||||
`hostPage`.`timeAdded`,
|
||||
`hostPage`.`timeUpdated`,
|
||||
`hostPage`.`mime`,
|
||||
`host`.`scheme`,
|
||||
`host`.`name`,
|
||||
`host`.`port`
|
||||
@ -349,7 +353,7 @@ class MySQL {
|
||||
return $query->fetch()->total;
|
||||
}
|
||||
|
||||
public function getHostPageIdSourcesByHostPageIdTarget(int $hostPageIdTarget, int $limit) {
|
||||
public function getHostPageIdSourcesByHostPageIdTarget(int $hostPageIdTarget, int $limit = 1000) {
|
||||
|
||||
$query = $this->_db->prepare('SELECT * FROM `hostPageToHostPage` WHERE `hostPageIdTarget` = ? ORDER BY `quantity` DESC LIMIT ' . (int) $limit);
|
||||
|
||||
|
@ -39,6 +39,15 @@ class SphinxQL {
|
||||
return $query->fetch()->total;
|
||||
}
|
||||
|
||||
public function getHostPagesMime() {
|
||||
|
||||
$query = $this->_sphinx->prepare('SELECT `mime` FROM `hostPage` GROUP BY `mime` ORDER BY `mime` ASC');
|
||||
|
||||
$query->execute();
|
||||
|
||||
return $query->fetchAll();
|
||||
}
|
||||
|
||||
public function searchHostPagesTotalByMime(string $keyword, string $mime) {
|
||||
|
||||
$query = $this->_sphinx->prepare('SELECT COUNT(*) AS `total` FROM `hostPage` WHERE MATCH(?) AND `mime` = ?');
|
||||
|
274
public/explore.php
Normal file
274
public/explore.php
Normal file
@ -0,0 +1,274 @@
|
||||
<?php
|
||||
|
||||
// Load system dependencies
|
||||
require_once('../config/app.php');
|
||||
require_once('../library/curl.php');
|
||||
require_once('../library/robots.php');
|
||||
require_once('../library/filter.php');
|
||||
require_once('../library/parser.php');
|
||||
require_once('../library/mysql.php');
|
||||
require_once('../library/sphinxql.php');
|
||||
|
||||
// Connect Sphinx search server
|
||||
$sphinx = new SphinxQL(SPHINX_HOST, SPHINX_PORT);
|
||||
|
||||
// Connect database
|
||||
$db = new MySQL(DB_HOST, DB_PORT, DB_NAME, DB_USERNAME, DB_PASSWORD);
|
||||
|
||||
// Filter request data
|
||||
$hp = !empty($_GET['hp']) ? Filter::url($_GET['hp']) : 0;
|
||||
|
||||
// Define page basics
|
||||
$totalPages = $sphinx->getHostPagesTotal();
|
||||
|
||||
$placeholder = Filter::plural($totalPages, [sprintf(_('Over %s page or enter the new one...'), $totalPages),
|
||||
sprintf(_('Over %s pages or enter the new one...'), $totalPages),
|
||||
sprintf(_('Over %s pages or enter the new one...'), $totalPages),
|
||||
]);
|
||||
|
||||
|
||||
|
||||
?>
|
||||
|
||||
<!DOCTYPE html>
|
||||
<html lang="<?php echo _('en-US'); ?>">
|
||||
<head>
|
||||
<title><?php echo sprintf(_('#%s info - YGGo!'), (int) $hp) ?></title>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="description" content="<?php echo _('Javascript-less Open Source Web Search Engine') ?>" />
|
||||
<meta name="keywords" content="<?php echo _('web, search, engine, crawler, php, pdo, mysql, sphinx, yggdrasil, js-less, open source') ?>" />
|
||||
<style>
|
||||
|
||||
* {
|
||||
border: 0;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
font-family: Sans-serif;
|
||||
}
|
||||
|
||||
body {
|
||||
background-color: #2e3436;
|
||||
}
|
||||
|
||||
header {
|
||||
background-color: #34393b;
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
}
|
||||
|
||||
main {
|
||||
margin-top: 110px;
|
||||
margin-bottom: 76px;
|
||||
padding: 0 20px;
|
||||
}
|
||||
|
||||
h1 {
|
||||
position: fixed;
|
||||
top: 8px;
|
||||
left: 24px;
|
||||
}
|
||||
|
||||
h1 > a,
|
||||
h1 > a:visited,
|
||||
h1 > a:active,
|
||||
h1 > a:hover {
|
||||
color: #fff;
|
||||
font-weight: normal;
|
||||
font-size: 24px;
|
||||
margin: 10px 0;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
h2 {
|
||||
display: block;
|
||||
font-size: 16px;
|
||||
font-weight: normal;
|
||||
margin: 4px 0;
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
form {
|
||||
display: block;
|
||||
max-width: 678px;
|
||||
margin: 0 auto;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
input {
|
||||
width: 100%;
|
||||
margin: 12px 0;
|
||||
padding: 10px 0;
|
||||
border-radius: 32px;
|
||||
background-color: #000;
|
||||
color: #fff;
|
||||
font-size: 16px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
input:hover {
|
||||
background-color: #111
|
||||
}
|
||||
|
||||
input:focus {
|
||||
outline: none;
|
||||
background-color: #111
|
||||
}
|
||||
|
||||
input:focus::placeholder {
|
||||
color: #090808
|
||||
}
|
||||
|
||||
label {
|
||||
font-size: 14px;
|
||||
color: #fff;
|
||||
float: left;
|
||||
margin-left: 16px;
|
||||
margin-bottom: 14px;
|
||||
}
|
||||
|
||||
label > input {
|
||||
width: auto;
|
||||
margin: 0 4px;
|
||||
}
|
||||
|
||||
button {
|
||||
padding: 8px 16px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
background-color: #3394fb;
|
||||
color: #fff;
|
||||
font-size: 14px;
|
||||
position: fixed;
|
||||
top: 15px;
|
||||
right: 24px;
|
||||
}
|
||||
|
||||
button:hover {
|
||||
background-color: #4b9df4;
|
||||
}
|
||||
|
||||
a, a:visited, a:active {
|
||||
color: #9ba2ac;
|
||||
display: inline-block;
|
||||
font-size: 12px;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #54a3f7;
|
||||
}
|
||||
|
||||
img.icon {
|
||||
float: left;
|
||||
border-radius: 50%;
|
||||
margin-right: 8px;
|
||||
}
|
||||
|
||||
img.image {
|
||||
max-width: 100%;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
div {
|
||||
max-width: 640px;
|
||||
margin: 0 auto;
|
||||
padding: 16px 0;
|
||||
border-top: 1px #000 dashed;
|
||||
font-size: 14px
|
||||
}
|
||||
|
||||
span {
|
||||
color: #ccc;
|
||||
display: block;
|
||||
margin: 8px 0;
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 16px 0;
|
||||
text-align: right;
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
p > a, p > a:visited, p > a:active {
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<form name="search" method="GET" action="<?php echo WEBSITE_DOMAIN; ?>/search.php">
|
||||
<h1><a href="<?php echo WEBSITE_DOMAIN; ?>"><?php echo _('YGGo!') ?></a></h1>
|
||||
<input type="text" name="q" placeholder="<?php echo $placeholder ?>" value="" />
|
||||
<?php foreach ($sphinx->getHostPagesMime() as $mime) { ?>
|
||||
<label><input type="radio" name="t" value="<?php echo $mime->mime ?>" /><?php echo $mime->mime ?></label>
|
||||
<?php } ?>
|
||||
<button type="submit"><?php echo _('Search'); ?></button>
|
||||
</form>
|
||||
</header>
|
||||
<main>
|
||||
<?php if ($hostPage = $db->getFoundHostPage($hp)) { ?>
|
||||
<div>
|
||||
<?php if ($hostPageDescription = $db->getLastPageDescription($hp)) { ?>
|
||||
<?php if (!empty($hostPageDescription->title)) { ?>
|
||||
<h2><?php echo $hostPageDescription->title ?></h2>
|
||||
<?php } ?>
|
||||
<?php if (!empty($hostPageDescription->description)) { ?>
|
||||
<span><?php echo $hostPageDescription->description ?></span>
|
||||
<?php } ?>
|
||||
<?php if (!empty($hostPageDescription->keywords)) { ?>
|
||||
<span><?php echo $hostPageDescription->keywords ?></span>
|
||||
<?php } ?>
|
||||
<?php } ?>
|
||||
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
|
||||
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
|
||||
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?>
|
||||
</a>
|
||||
</div>
|
||||
<div>
|
||||
<p><?php echo _('MIME') ?></p>
|
||||
<p><?php echo $hostPage->mime ?></p>
|
||||
<p><?php echo _('Time added') ?></p>
|
||||
<p><?php echo date('c', $hostPage->timeAdded) ?></p>
|
||||
<p><?php echo _('Time updated') ?></p>
|
||||
<p><?php echo date('c', $hostPage->timeUpdated) ?></p>
|
||||
<?php if ($totalHostPageIdSources = $db->getTotalHostPageIdSourcesByHostPageIdTarget($hp)) { ?>
|
||||
<p>
|
||||
<?php echo Filter::plural($totalHostPageIdSources, [sprintf(_('%s referrer'), $totalHostPageIdSources),
|
||||
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
||||
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
||||
]) ?>
|
||||
</p>
|
||||
<?php foreach ($db->getHostPageIdSourcesByHostPageIdTarget($hp) as $hostPageIdSource) { ?>
|
||||
<?php if ($hostPage = $db->getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?>
|
||||
<p>
|
||||
<?php echo Filter::plural($hostPageIdSource->quantity, [sprintf(_('%s ref'), $hostPageIdSource->quantity),
|
||||
sprintf(_('%s refs'), $hostPageIdSource->quantity),
|
||||
sprintf(_('%s refs'), $hostPageIdSource->quantity),
|
||||
]) ?>
|
||||
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
|
||||
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
|
||||
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?>
|
||||
</a>
|
||||
|
|
||||
<a href="<?php echo WEBSITE_DOMAIN; ?>/explore.php?hp=<?php echo $hostPage->hostPageId ?>">
|
||||
<?php echo _('explore'); ?>
|
||||
</a>
|
||||
</p>
|
||||
<?php } ?>
|
||||
<?php } ?>
|
||||
<?php } ?>
|
||||
</div>
|
||||
<?php } else { ?>
|
||||
<div style="text-align:center">
|
||||
<span><?php echo _('Not found') ?></span>
|
||||
<?php if ($queueTotal = $db->getTotalPagesByHttpCode(null)) { ?>
|
||||
<span><?php echo sprintf(_('* Please wait for all pages crawl to complete (%s in queue).'), $queueTotal) ?></span>
|
||||
<?php } ?>
|
||||
</div>
|
||||
<?php } ?>
|
||||
</main>
|
||||
</body>
|
||||
</html>
|
@ -20,13 +20,12 @@ $t = !empty($_GET['t']) ? Filter::url($_GET['t']) : 'text';
|
||||
$m = !empty($_GET['m']) ? Filter::url($_GET['m']) : 'default';
|
||||
$q = !empty($_GET['q']) ? Filter::url($_GET['q']) : '';
|
||||
$p = !empty($_GET['p']) ? (int) $_GET['p'] : 1;
|
||||
$i = !empty($_GET['i']) ? (int) $_GET['i'] : 0;
|
||||
|
||||
// Search request
|
||||
if (!empty($q)) {
|
||||
|
||||
$resultsTotal = $sphinx->searchHostPagesTotal(Filter::searchQuery($q, $m), $t);
|
||||
$results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $t, ($i ? 1 : $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT), ($i ? 1 : WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT), ($i ? 1 : $resultsTotal));
|
||||
$results = $sphinx->searchHostPages(Filter::searchQuery($q, $m), $t, $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT - WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT, $resultsTotal);
|
||||
|
||||
} else {
|
||||
|
||||
@ -296,7 +295,7 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
p > a {
|
||||
p > a, p > a:visited, p > a:active {
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
@ -337,19 +336,23 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
<?php } ?>
|
||||
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
|
||||
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
|
||||
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?>
|
||||
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 36 ? '...' . mb_substr(urldecode($hostPage->uri), -36) : urldecode($hostPage->uri))) ?>
|
||||
</a>
|
||||
|
|
||||
<a href="<?php echo WEBSITE_DOMAIN; ?>/explore.php?hp=<?php echo $result->id ?>">
|
||||
<?php echo _('explore'); ?>
|
||||
</a>
|
||||
<?php if ($result->mime != 'text' && $totalHostPageIdSources = $db->getTotalHostPageIdSourcesByHostPageIdTarget($result->id)) { ?>
|
||||
<p>
|
||||
<a href="search.php?q=<?php echo urlencode($q) ?>&t=<?php echo $t ?>&m=<?php echo $m ?>&i=<?php echo $result->id ?>&p=<?php echo $p ?>">
|
||||
<?php echo Filter::plural($totalHostPageIdSources, [sprintf(_('%s referrer'), $totalHostPageIdSources),
|
||||
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
||||
sprintf(_('%s referrers'), $totalHostPageIdSources),
|
||||
]) ?>
|
||||
</a>
|
||||
</p>
|
||||
<?php foreach ($db->getHostPageIdSourcesByHostPageIdTarget($result->id, ($i ? 1000 : 5)) as $j => $hostPageIdSource) { ?>
|
||||
<?php $i = 1 ?>
|
||||
<?php foreach ($db->getHostPageIdSourcesByHostPageIdTarget($result->id, 5) as $hostPageIdSource) { ?>
|
||||
<?php if ($hostPage = $db->getFoundHostPage($hostPageIdSource->hostPageIdSource)) { ?>
|
||||
<?php $i++ ?>
|
||||
<p>
|
||||
<?php echo Filter::plural($hostPageIdSource->quantity, [sprintf(_('%s ref'), $hostPageIdSource->quantity),
|
||||
sprintf(_('%s refs'), $hostPageIdSource->quantity),
|
||||
@ -357,16 +360,29 @@ if (filter_var($q, FILTER_VALIDATE_URL) && preg_match(CRAWL_URL_REGEXP, $q)) {
|
||||
]) ?>
|
||||
<a href="<?php echo $hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false) . $hostPage->uri ?>">
|
||||
<img src="<?php echo WEBSITE_DOMAIN; ?>/image.php?q=<?php echo urlencode($hostPage->name) ?>" alt="favicon" width="16" height="16" class="icon" />
|
||||
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 48 ? '...' . mb_substr(urldecode($hostPage->uri), -48) : urldecode($hostPage->uri))) ?>
|
||||
<?php echo htmlentities(urldecode($hostPage->scheme . '://' . $hostPage->name . ($hostPage->port ? ':' . $hostPage->port : false)) . (mb_strlen(urldecode($hostPage->uri)) > 36 ? '...' . mb_substr(urldecode($hostPage->uri), -36) : urldecode($hostPage->uri))) ?>
|
||||
</a>
|
||||
<!--
|
||||
|
|
||||
<a href="<?php echo WEBSITE_DOMAIN; ?>/explore.php?hp=<?php echo $hostPage->hostPageId ?>">
|
||||
<?php echo _('explore'); ?>
|
||||
</a>
|
||||
-->
|
||||
</p>
|
||||
<?php } ?>
|
||||
<?php } ?>
|
||||
<?php if ($i < $totalHostPageIdSources) { ?>
|
||||
<p>
|
||||
<a href="<?php echo WEBSITE_DOMAIN; ?>/explore.php?hp=<?php echo $result->id ?>#referrers">
|
||||
<?php echo _('view all'); ?>
|
||||
</a>
|
||||
</p>
|
||||
<?php } ?>
|
||||
<?php } ?>
|
||||
</div>
|
||||
<?php } ?>
|
||||
<?php } ?>
|
||||
<?php if (!$i && $p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT <= $resultsTotal) { ?>
|
||||
<?php if ($p * WEBSITE_PAGINATION_SEARCH_PAGE_RESULTS_LIMIT <= $resultsTotal) { ?>
|
||||
<div>
|
||||
<a href="<?php echo WEBSITE_DOMAIN; ?>/search.php?q=<?php echo urlencode(htmlentities($q)) ?>&t=<?php echo $t ?>&m=<?php echo $m ?>&p=<?php echo $p + 1 ?>"><?php echo _('Next page') ?></a>
|
||||
</div>
|
||||
|
Loading…
x
Reference in New Issue
Block a user