Micro Web Crawler in PHP & Manticore
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

306 lines
8.7 KiB

{
"manticore":
{
"server":
{
"host":"127.0.0.1",
"port":9308
},
"index":
{
"document":{
"name":"yo_document",
"settings":
{
"morphology":"stem_cz,stem_enru",
"index_exact_words":1,
"html_strip":1,
"min_word_len":3,
"min_prefix_len":3
}
}
}
},
"webui":
{
"pagination":
{
"limit":20
},
"search":
{
"index":
{
"request":
{
"url":{
"enabled":false,
"regex":"/.*/ui"
}
}
},
"extended":
{
"enabled":false
}
},
"footer":
{
"links":
[
{
"text":"0200::/7",
"attributes":
{
"title":"Search in 0200::/7 IPv6",
"href":"http://[201:23b4:991a:634d:8359:4521:5576:15b7]/yo/"
},
"index":
[
"http://[201:23b4:991a:634d:8359:4521:5576:15b7]/yo/index.sql"
]
},
{
"text":"yo.ygg",
"attributes":
{
"title":"Search in .ygg zone",
"href":"http://yo.ygg"
},
"index":
[
"http://yo.ygg/index.sql"
]
},
{
"text":"ygg.yo.index",
"attributes":
{
"title":"Search in .ygg zone",
"href":"http://ygg.yo.index"
},
"index":
[
"http://ygg.yo.index/index.sql"
]
},
{
"text":"GitHub",
"attributes":
{
"title":"Source code",
"href":"https://github.com/YGGverse/Yo"
},
"index":[]
}
]
}
},
"cli":
{
"document":
{
"crawl":
{
"debug":
{
"level":
{
"notice":true,
"warning":true,
"error":true
}
},
"curl":
{
"connection":
{
"timeout":3
},
"download":
{
"size":
{
"max":10000024
}
}
},
"queue":
{
"limit":1,
"delay":1
},
"selector":
{
"a:not([rel=nofollow])":
{
"attribute":"href",
"external":false,
"regex":"/.*/ui"
},
"image":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"audio":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"video":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"script":
{
"attribute":"href",
"external":false,
"regex":"/.*/ui"
}
},
"skip":
{
"stripos":
{
"url":
[
"#",
"javascript:",
"mailto:",
"magnet:",
"xmpp:",
"служебная:",
"commit",
"diff",
"print",
"raw",
"cache",
"download",
"share",
"explore",
"register",
"login",
"password",
"forgot",
"restore",
"account",
"reply",
"compose",
"comment",
"edit",
"quote",
"report",
"block",
"transaction",
"search",
"tag",
"page",
"sort",
"order",
"filter",
"limit"
]
}
},
"snap":
{
"enabled":true
}
}
}
},
"snap":
{
"storage":
{
"tmp":{
"directory":"storage/tmp/snap"
},
"local":{
"enabled":false,
"directory":"storage/snap",
"size":
{
"max":10000024
},
"mime":
{
"stripos":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
},
"url":
{
"stripos":
[
"http"
]
}
},
"remote":
{
"ftp":
[
{
"enabled":false,
"connection":
{
"port":21,
"host":"",
"username":"",
"password":"",
"directory":"/snap/yo",
"timeout":30,
"passive":true,
"attempts":
{
"limit":0,
"delay":60
}
},
"size":
{
"max":10000024
},
"mime":
{
"stripos":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
},
"url":
{
"stripos":
[
"http"
]
}
}
]
}
}
}
}