2023-05-03 22:34:12 +00:00
|
|
|
source common
|
2023-04-07 01:04:24 +00:00
|
|
|
{
|
|
|
|
type = mysql
|
|
|
|
|
|
|
|
sql_host = localhost
|
|
|
|
sql_user =
|
|
|
|
sql_pass =
|
|
|
|
sql_db =
|
|
|
|
sql_port = 3306 # optional, default is 3306
|
2023-05-03 22:34:12 +00:00
|
|
|
}
|
2023-04-07 01:04:24 +00:00
|
|
|
|
2023-05-03 22:34:12 +00:00
|
|
|
source hostPage : common
|
|
|
|
{
|
2023-04-07 01:04:24 +00:00
|
|
|
sql_query = \
|
2023-05-10 09:47:36 +00:00
|
|
|
SELECT `hostPage`.`hostPageId`, \
|
|
|
|
`hostPage`.`uri`, \
|
2023-08-01 13:23:40 +00:00
|
|
|
REPLACE(REPLACE(REPLACE(REPLACE(`hostPage`.`uri`, '/', ' '), '_', ' '), '-', ' '), '.', ' ') AS `hostPageURIKeywords`, \
|
2023-07-28 09:49:43 +00:00
|
|
|
`hostPage`.`rank`, \
|
2023-05-10 09:47:36 +00:00
|
|
|
`host`.`name`, \
|
2023-07-30 09:39:41 +00:00
|
|
|
IF (`host`.`port` IS NOT NULL, \
|
2023-07-31 19:42:49 +00:00
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \
|
2023-08-01 10:50:07 +00:00
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`)), \
|
|
|
|
CRC32 (IF (`host`.`port` IS NOT NULL, \
|
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`), \
|
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`))), \
|
2023-07-31 19:42:49 +00:00
|
|
|
IF (`host`.`port` IS NOT NULL, \
|
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \
|
2023-08-01 10:50:07 +00:00
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`)), \
|
|
|
|
CRC32 (IF (`host`.`port` IS NOT NULL, \
|
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, ':', `host`.`port`, `hostPage`.`uri`), \
|
|
|
|
CONCAT(`host`.`scheme`, '://', `host`.`name`, `hostPage`.`uri`))), \
|
2023-05-10 20:37:24 +00:00
|
|
|
REGEXP_REPLACE(`hostPage`.`mime`, '^([A-z-]+)/[A-z-]+.*', '$1') AS `mime`, \
|
2023-08-01 11:15:14 +00:00
|
|
|
(SELECT GROUP_CONCAT(`hostPageDescription`.`title`) \
|
2023-05-10 09:47:36 +00:00
|
|
|
FROM `hostPageDescription` \
|
2023-08-01 11:15:14 +00:00
|
|
|
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `title`, \
|
|
|
|
(SELECT GROUP_CONCAT(`hostPageDescription`.`description`) \
|
|
|
|
FROM `hostPageDescription` \
|
|
|
|
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `description`, \
|
|
|
|
(SELECT GROUP_CONCAT(`hostPageDescription`.`keywords`) \
|
|
|
|
FROM `hostPageDescription` \
|
|
|
|
WHERE `hostPageDescription`.`hostPageId` = `hostPage`.`hostPageId`) AS `keywords` \
|
2023-05-10 09:47:36 +00:00
|
|
|
FROM `hostPage` \
|
|
|
|
JOIN `host` ON (`host`.`hostId` = `hostPage`.`hostId`) \
|
2023-10-25 15:33:04 +00:00
|
|
|
WHERE `hostPage`.`httpCode` != 0 AND `hostPage`.`httpCode` IS NOT NULL AND `hostPage`.`mime` IS NOT NULL \
|
2023-04-07 01:04:24 +00:00
|
|
|
|
2023-05-10 09:47:36 +00:00
|
|
|
sql_attr_uint = rank
|
|
|
|
sql_attr_string = mime
|
2023-05-03 22:34:12 +00:00
|
|
|
}
|
|
|
|
|
2023-04-07 01:04:24 +00:00
|
|
|
index hostPage
|
|
|
|
{
|
2023-06-05 10:36:15 +00:00
|
|
|
source = hostPage
|
2023-06-05 15:20:49 +00:00
|
|
|
morphology = stem_cz, stem_ar, lemmatize_de_all, lemmatize_ru_all, lemmatize_en_all # stem_enru
|
2023-06-05 10:36:15 +00:00
|
|
|
path = /var/lib/sphinxsearch/data/hostPage
|
|
|
|
|
|
|
|
min_word_len = 2
|
|
|
|
min_prefix_len = 2
|
|
|
|
|
|
|
|
html_strip = 1
|
|
|
|
|
|
|
|
index_exact_words = 1
|
2023-06-05 15:13:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
indexer
|
|
|
|
{
|
2023-06-13 00:16:29 +00:00
|
|
|
mem_limit = 256M
|
|
|
|
lemmatizer_cache = 256M
|
2023-06-05 15:13:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
common {
|
|
|
|
lemmatizer_base = /var/lib/sphinxsearch/dicts # http://sphinxsearch.com/downloads/dicts
|
2023-04-07 01:04:24 +00:00
|
|
|
}
|