Micro Web Crawler in PHP & Manticore
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

354 lines
10 KiB

1 year ago
{
"manticore":
{
"server":
{
"host":"127.0.0.1",
"port":9308
},
"index":
{
"document":{
"name":"yo_document",
"settings":
{
"morphology":"stem_cz,stem_enru",
"index_exact_words":1,
"html_strip":1,
"min_word_len":3,
"min_prefix_len":3
}
}
1 year ago
}
},
"webui":
{
"pagination":
{
"limit":20
},
"search":
{
"index":
{
"request":
{
"url":{
"enabled":false,
"regex":"/.*/ui"
}
}
},
"extended":
{
"enabled":false
},
"options":
{
"field_weights":
{
"url":100,
"title":200,
"description":300,
"keywords":400,
"body":500
}
},
"highlight":
{
"fields":
[
"url",
"title",
"description",
"keywords",
"body"
],
"options":
{
"around":5,
"limit":140
}
}
},
"footer":
{
"links":
[
{
"text":"0200::/7",
"attributes":
{
1 year ago
"title":"Search in 0200::/7 IPv6",
"href":"http://[201:23b4:991a:634d:8359:4521:5576:15b7]/yo/"
},
"index":
[
"http://[201:23b4:991a:634d:8359:4521:5576:15b7]/yo/index.sql"
]
},
{
"text":"yo.ygg",
"attributes":
{
"title":"Search in .ygg zone",
"href":"http://yo.ygg"
},
"index":
[
"http://yo.ygg/index.sql"
]
},
{
"text":"ygg.yo.index",
"attributes":
{
"title":"Search in .ygg zone",
"href":"http://ygg.yo.index"
},
"index":
[
"http://ygg.yo.index/index.sql"
]
},
{
"text":"GitHub",
"attributes":
{
"title":"Source code",
"href":"https://github.com/YGGverse/Yo"
},
"index":[]
}
]
},
"index":
{
"enabled":true
1 year ago
}
},
"cli":
{
"document":
{
"crawl":
{
"debug":
{
"level":
{
"notice":true,
"warning":true,
"error":true
}
},
"curl":
{
"connection":
{
"timeout":3
},
"download":
{
"size":
{
"max":10000024
}
}
},
1 year ago
"queue":
{
"limit":1,
"delay":1
},
"selector":
{
"a:not([rel=nofollow])":
{
"attribute":"href",
"external":false,
"regex":"/.*/ui"
},
"image":
{
"attribute":"src",
"external":false,
1 year ago
"regex":"/.*/ui"
},
"audio":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"video":
{
"attribute":"src",
"external":false,
"regex":"/.*/ui"
},
"script":
{
"attribute":"href",
"external":false,
"regex":"/.*/ui"
}
},
"skip":
{
"stripos":
{
"url":
[
"#",
9 months ago
"?",
"javascript:",
"mailto:",
"magnet:",
"xmpp:",
"/commit",
"/diff",
"/print",
"/raw",
"/cache",
"/download",
"/share",
"/explore",
"/register",
"/login",
"/password",
"/forgot",
"/restore",
"/account",
"/reply",
"/read",
"/compose",
"/comment",
"/add",
"/edit",
"/delete",
"/quote",
"/report",
"/export",
"/import",
"/mobile",
"/mwiki",
"/branch",
"/block",
"/transaction",
"/search",
"/tag",
"/page",
"/sort",
"/order",
"/pdf",
"/fb2",
"/mobi",
"/epub",
"/djvu",
"/_detail",
"/_media",
9 months ago
"/t/",
"/q/",
"/s/"
]
}
},
1 year ago
"snap":
{
"enabled":true,
"timeout":2592000
1 year ago
}
}
}
},
"snap":
{
"storage":
{
"tmp":{
"directory":"storage/tmp/snap"
},
1 year ago
"local":{
"enabled":false,
1 year ago
"directory":"storage/snap",
"size":
{
"max":10000024
1 year ago
},
"mime":
{
"stripos":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
},
"url":
{
"stripos":
[
"http"
]
}
1 year ago
},
"remote":
1 year ago
{
"ftp":
[
{
"enabled":false,
"connection":
{
"port":21,
"host":"",
"username":"",
"password":"",
"directory":"/snap/yo",
"timeout":30,
"passive":true,
"attempts":
{
"limit":0,
"delay":60
}
},
"size":
{
"max":10000024
},
"mime":
{
"stripos":
[
"application/xhtml+xml",
"application/javascript",
"text/html",
"text/plain",
"text/css",
"image/webp",
"image/png",
"image/gif",
"image/ico"
]
},
"url":
{
"stripos":
[
"http"
]
}
}
]
1 year ago
}
}
}
}