mirror of
https://github.com/twisterarmy/twister-core.git
synced 2025-01-22 20:44:56 +00:00
improve search
This commit is contained in:
parent
c500409517
commit
080ac30468
310
src/twister.cpp
310
src/twister.cpp
@ -2347,33 +2347,201 @@ Value torrentstatus(const Array& params, bool fHelp)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class TextSearch
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
enum search_mode {
|
||||||
|
TEXTSEARCH_EXACT,
|
||||||
|
TEXTSEARCH_ALL,
|
||||||
|
TEXTSEARCH_ANY
|
||||||
|
};
|
||||||
|
|
||||||
|
TextSearch(std::string const &keyword, libtorrent::entry const ¶ms);
|
||||||
|
|
||||||
|
bool matchText(std::string msg);
|
||||||
|
libtorrent::lazy_entry const* matchRawMessage(std::string const &rawMessage);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<std::string> keywords;
|
||||||
|
search_mode mode;
|
||||||
|
bool caseInsensitive;
|
||||||
|
int64_t timeMin, timeMax;
|
||||||
|
std::string username;
|
||||||
|
};
|
||||||
|
|
||||||
|
TextSearch::TextSearch(string const &keyword, entry const ¶ms) :
|
||||||
|
mode(TEXTSEARCH_EXACT),
|
||||||
|
caseInsensitive(false),
|
||||||
|
timeMin(0),
|
||||||
|
timeMax(numeric_limits<int64_t>::max())
|
||||||
|
{
|
||||||
|
entry const *pMode = params.find_key("mode");
|
||||||
|
if( pMode && pMode->type() == entry::string_t ) {
|
||||||
|
string strMode = pMode->string();
|
||||||
|
if( strMode == "all" ) {
|
||||||
|
mode = TEXTSEARCH_ALL;
|
||||||
|
} else if( strMode == "any" ) {
|
||||||
|
mode = TEXTSEARCH_ANY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
entry const *pCase = params.find_key("case");
|
||||||
|
if( pCase && pCase->type() == entry::string_t && pCase->string() == "insensitive" ) {
|
||||||
|
caseInsensitive = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t now = GetAdjustedTime();
|
||||||
|
|
||||||
|
entry const *pAgeMin = params.find_key("agemin");
|
||||||
|
if( pAgeMin && pAgeMin->type() == entry::int_t ) {
|
||||||
|
timeMax = now - pAgeMin->integer() * 24*60*60;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry const *pAgeMax = params.find_key("agemax");
|
||||||
|
if( pAgeMax && pAgeMax->type() == entry::int_t ) {
|
||||||
|
timeMin = now - pAgeMax->integer() * 24*60*60;
|
||||||
|
}
|
||||||
|
|
||||||
|
entry const *pUsername = params.find_key("username");
|
||||||
|
if( pUsername && pUsername->type() == entry::string_t ) {
|
||||||
|
username = pUsername->string();
|
||||||
|
}
|
||||||
|
|
||||||
|
if( mode == TEXTSEARCH_EXACT ) {
|
||||||
|
keywords.push_back( keyword );
|
||||||
|
} else {
|
||||||
|
stringstream stream( keyword );
|
||||||
|
string word;
|
||||||
|
while( getline(stream, word, ' ') ) {
|
||||||
|
if( !word.empty() ) {
|
||||||
|
keywords.push_back( word );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if( caseInsensitive ) {
|
||||||
|
for( vector<string>::iterator it=keywords.begin(); it != keywords.end(); ++it ) {
|
||||||
|
#ifdef HAVE_BOOST_LOCALE
|
||||||
|
*it = boost::locale::to_lower(*it);
|
||||||
|
#else
|
||||||
|
boost::algorithm::to_lower(*it);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TextSearch::matchText(string msg)
|
||||||
|
{
|
||||||
|
if( keywords.size() == 0 ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( caseInsensitive ) {
|
||||||
|
#ifdef HAVE_BOOST_LOCALE
|
||||||
|
msg = boost::locale::to_lower(msg);
|
||||||
|
#else
|
||||||
|
boost::algorithm::to_lower(msg);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
switch( mode ) {
|
||||||
|
case TEXTSEARCH_EXACT:
|
||||||
|
return msg.find(keywords[0]) != string::npos;
|
||||||
|
case TEXTSEARCH_ALL:
|
||||||
|
for( vector<string>::const_iterator it=keywords.begin(); it != keywords.end(); ++it ) {
|
||||||
|
if( msg.find(*it) == string::npos ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
case TEXTSEARCH_ANY:
|
||||||
|
for( vector<string>::const_iterator it=keywords.begin(); it != keywords.end(); ++it ) {
|
||||||
|
if( msg.find(*it) != string::npos ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_entry const* TextSearch::matchRawMessage(string const &rawMessage)
|
||||||
|
{
|
||||||
|
if( keywords.size() == 0 ) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
// fast check
|
||||||
|
if( mode != TEXTSEARCH_ANY && rawMessage.find(keywords[0]) == string::npos ) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_entry v;
|
||||||
|
int pos;
|
||||||
|
libtorrent::error_code ec;
|
||||||
|
if (lazy_bdecode(rawMessage.data(), rawMessage.data()+rawMessage.size(), v, ec, &pos) == 0) {
|
||||||
|
lazy_entry const* vv = v.dict_find_dict("v");
|
||||||
|
lazy_entry const* post = vv ? vv->dict_find_dict("userpost") : v.dict_find_dict("userpost");
|
||||||
|
if( post ) {
|
||||||
|
lazy_entry const* rt = post->dict_find_dict("rt");
|
||||||
|
lazy_entry const* p = rt ? rt : post;
|
||||||
|
|
||||||
|
if( username.length() ) {
|
||||||
|
string user = p->dict_find_string_value("n");
|
||||||
|
if( user != username ) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t time = p->dict_find_int_value("time");
|
||||||
|
if( time < timeMin || time > timeMax ) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
string msg = p->dict_find_string_value("msg");
|
||||||
|
return matchText( msg ) ? p : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
Value search(const Array& params, bool fHelp)
|
Value search(const Array& params, bool fHelp)
|
||||||
{
|
{
|
||||||
if (fHelp || params.size() < 3 || params.size() > 4)
|
if (fHelp || params.size() < 3 || params.size() > 4)
|
||||||
throw runtime_error(
|
throw runtime_error(
|
||||||
"search <scope> <text> <count> [<username>]\n"
|
"search <scope> <text> <count> ['{\"username\":username,\"mode\":\"exact\"|\"all\"|\"any\",\"case\":\"sensitive\"\"insensitive\",\"agemin\":agemin,\"agemax\":agemin}']\n"
|
||||||
"search text in known data\n"
|
"search text in available data\n"
|
||||||
"<scope> is data area: messages, directmsgs, profiles, users, hashtags\n"
|
"<scope> is data area: messages, directmsgs, profiles, users, hashtags\n"
|
||||||
"<text> is a phrase to search\n"
|
"<text> is a phrase to search\n"
|
||||||
"up to <count> entries are returned\n"
|
"up to <count> entries are returned\n"
|
||||||
"<username> in messages scope is optional and allows to search in username's messages only\n"
|
"<username> in messages scope is optional and allows to search in username's messages only\n"
|
||||||
"<username> in directmsgs scope is required and sets whose conversation to search");
|
"<username> in directmsgs scope is required and sets whose conversation to search\n"
|
||||||
|
"\"mode\" and \"case\" are search mode options\n"
|
||||||
|
"\"agemin\" and \"agemax\" (days) are message filters\n"
|
||||||
|
"\"mode\", \"case\", \"agemin\", and \"agemax\" are optional");
|
||||||
|
|
||||||
string scope = params[0].get_str();
|
string scope = params[0].get_str();
|
||||||
string keyword = params[1].get_str();
|
string keyword = params[1].get_str();
|
||||||
int count = params[2].get_int();
|
int count = params[2].get_int();
|
||||||
string username = params.size()==4 ? params[3].get_str() : string();
|
entry options = params.size()==4 ? jsonToEntry(params[3].get_obj()) : entry();
|
||||||
|
string username;
|
||||||
|
|
||||||
if( keyword.size() == 0 ) {
|
if( keyword.size() == 0 ) {
|
||||||
throw runtime_error("Empty <text> parameter");
|
throw runtime_error("Empty <text> parameter");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
entry const *pUsername = options.find_key("username");
|
||||||
|
if( pUsername && pUsername->type() == entry::string_t ) {
|
||||||
|
username = pUsername->string();
|
||||||
|
}
|
||||||
|
|
||||||
Array ret;
|
Array ret;
|
||||||
|
|
||||||
if( scope == "messages" ) {
|
if( scope == "messages" ) {
|
||||||
// search public messages
|
// search public messages
|
||||||
std::map< pair<std::string,int>, pair<int64,entry> > posts;
|
std::map< pair<std::string,int>, pair<int64,entry> > posts;
|
||||||
|
|
||||||
|
TextSearch searcher(keyword, options);
|
||||||
|
|
||||||
// search public messages in torrents
|
// search public messages in torrents
|
||||||
{
|
{
|
||||||
LOCK(cs_twister);
|
LOCK(cs_twister);
|
||||||
@ -2391,29 +2559,17 @@ Value search(const Array& params, bool fHelp)
|
|||||||
item.second.get_pieces(pieces, std::numeric_limits<int>::max(), std::numeric_limits<int>::max(), -1, ~USERPOST_FLAG_DM);
|
item.second.get_pieces(pieces, std::numeric_limits<int>::max(), std::numeric_limits<int>::max(), -1, ~USERPOST_FLAG_DM);
|
||||||
|
|
||||||
BOOST_FOREACH(string const& piece, pieces) {
|
BOOST_FOREACH(string const& piece, pieces) {
|
||||||
if( piece.find(keyword) != string::npos ) {
|
lazy_entry const* p = searcher.matchRawMessage(piece);
|
||||||
lazy_entry v;
|
if( p ) {
|
||||||
int pos;
|
string n = p->dict_find_string_value("n");
|
||||||
libtorrent::error_code ec;
|
int k = p->dict_find_int_value("k");
|
||||||
if (lazy_bdecode(piece.data(), piece.data()+piece.size(), v, ec, &pos) == 0) {
|
int64 time = p->dict_find_int_value("time",-1);
|
||||||
lazy_entry const* post = v.dict_find_dict("userpost");
|
|
||||||
if( post ) {
|
|
||||||
lazy_entry const* rt = post->dict_find_dict("rt");
|
|
||||||
lazy_entry const* p = rt ? rt : post;
|
|
||||||
string msg = p->dict_find_string_value("msg");
|
|
||||||
if( msg.find(keyword) != string::npos ) {
|
|
||||||
string n = p->dict_find_string_value("n");
|
|
||||||
int k = p->dict_find_int_value("k");
|
|
||||||
int64 time = p->dict_find_int_value("time",-1);
|
|
||||||
|
|
||||||
entry vEntry;
|
entry vEntry;
|
||||||
vEntry = *p;
|
vEntry = *p;
|
||||||
hexcapePost(vEntry);
|
hexcapePost(vEntry);
|
||||||
|
|
||||||
posts[pair<std::string,int>(n,k)] = pair<int64,entry>(time,vEntry);
|
posts[pair<std::string,int>(n,k)] = pair<int64,entry>(time,vEntry);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2430,39 +2586,19 @@ Value search(const Array& params, bool fHelp)
|
|||||||
continue;
|
continue;
|
||||||
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
|
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
|
||||||
string str_p = j->find_key("p")->string();
|
string str_p = j->find_key("p")->string();
|
||||||
if( str_p.find(keyword) != string::npos ) {
|
lazy_entry const* p = searcher.matchRawMessage(str_p);
|
||||||
lazy_entry p;
|
if( p ) {
|
||||||
int pos;
|
string n = p->dict_find_string_value("n");
|
||||||
libtorrent::error_code err;
|
int k = p->dict_find_int_value("k");
|
||||||
int ret = lazy_bdecode(str_p.data(), str_p.data() + str_p.size(), p, err, &pos);
|
pair<std::string,int> post_id(n,k);
|
||||||
|
if( posts.count(post_id) == 0 ) {
|
||||||
|
int64 time = p->dict_find_int_value("time",-1);
|
||||||
|
|
||||||
lazy_entry const* v = p.dict_find_dict("v");
|
entry vEntry;
|
||||||
if( v ) {
|
vEntry = *p;
|
||||||
lazy_entry const* post = v->dict_find_dict("userpost");
|
hexcapePost(vEntry);
|
||||||
if( post ) {
|
|
||||||
// post, mention, status
|
|
||||||
lazy_entry const* rt = post->dict_find_dict("rt");
|
|
||||||
lazy_entry const* p = rt ? rt : post;
|
|
||||||
string msg = p->dict_find_string_value("msg");
|
|
||||||
if( msg.find(keyword) != string::npos ) {
|
|
||||||
string n = p->dict_find_string_value("n");
|
|
||||||
|
|
||||||
if( username.size() == 0 || n == username ) {
|
posts[post_id] = pair<int64,entry>(time,vEntry);
|
||||||
int k = p->dict_find_int_value("k");
|
|
||||||
|
|
||||||
pair<std::string,int> post_id(n,k);
|
|
||||||
if( posts.count(post_id) == 0 ) {
|
|
||||||
int64 time = p->dict_find_int_value("time",-1);
|
|
||||||
|
|
||||||
entry vEntry;
|
|
||||||
vEntry = *p;
|
|
||||||
hexcapePost(vEntry);
|
|
||||||
|
|
||||||
posts[post_id] = pair<int64,entry>(time,vEntry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2481,16 +2617,18 @@ Value search(const Array& params, bool fHelp)
|
|||||||
|
|
||||||
} else if( scope == "directmsgs" ) {
|
} else if( scope == "directmsgs" ) {
|
||||||
// search direct messages
|
// search direct messages
|
||||||
if( m_users.count(username) ){
|
if( m_users.count(username) ) {
|
||||||
std::multimap<int64,entry> postsByTime;
|
std::multimap<int64,entry> postsByTime;
|
||||||
|
|
||||||
|
TextSearch searcher(keyword, options);
|
||||||
|
|
||||||
{
|
{
|
||||||
LOCK(cs_twister);
|
LOCK(cs_twister);
|
||||||
|
|
||||||
BOOST_FOREACH(const PAIRTYPE(std::string,std::vector<StoredDirectMsg>)& list, m_users[username].m_directmsg) {
|
BOOST_FOREACH(const PAIRTYPE(std::string,std::vector<StoredDirectMsg>)& list, m_users[username].m_directmsg) {
|
||||||
string remoteUser = list.first;
|
string remoteUser = list.first;
|
||||||
BOOST_FOREACH(const StoredDirectMsg& item, list.second) {
|
BOOST_FOREACH(const StoredDirectMsg& item, list.second) {
|
||||||
if( item.m_text.find(keyword) != string::npos ) {
|
if( searcher.matchText(item.m_text) ) {
|
||||||
int64 time = item.m_utcTime;
|
int64 time = item.m_utcTime;
|
||||||
entry vEntry;
|
entry vEntry;
|
||||||
vEntry["remoteUser"] = remoteUser;
|
vEntry["remoteUser"] = remoteUser;
|
||||||
@ -2518,38 +2656,38 @@ Value search(const Array& params, bool fHelp)
|
|||||||
entry data = ses->dht_getLocalData();
|
entry data = ses->dht_getLocalData();
|
||||||
std::map<string,entry> users;
|
std::map<string,entry> users;
|
||||||
|
|
||||||
|
TextSearch searcher(keyword, options);
|
||||||
|
|
||||||
for (entry::dictionary_type::const_iterator i = data.dict().begin(); i != data.dict().end(); ++i) {
|
for (entry::dictionary_type::const_iterator i = data.dict().begin(); i != data.dict().end(); ++i) {
|
||||||
if ( i->second.type() != entry::list_t )
|
if ( i->second.type() != entry::list_t )
|
||||||
continue;
|
continue;
|
||||||
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
|
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
|
||||||
string str_p = j->find_key("p")->string();
|
string str_p = j->find_key("p")->string();
|
||||||
if( str_p.find(keyword) != string::npos ) {
|
lazy_entry p;
|
||||||
lazy_entry p;
|
int pos;
|
||||||
int pos;
|
libtorrent::error_code err;
|
||||||
libtorrent::error_code err;
|
int ret = lazy_bdecode(str_p.data(), str_p.data() + str_p.size(), p, err, &pos);
|
||||||
int ret = lazy_bdecode(str_p.data(), str_p.data() + str_p.size(), p, err, &pos);
|
|
||||||
|
|
||||||
lazy_entry const* target = p.dict_find_dict("target");
|
lazy_entry const* target = p.dict_find_dict("target");
|
||||||
if( target ) {
|
if( target ) {
|
||||||
string resource = target->dict_find_string_value("r");
|
string resource = target->dict_find_string_value("r");
|
||||||
if( resource == "profile" ) {
|
if( resource == "profile" ) {
|
||||||
lazy_entry const* v = p.dict_find_dict("v");
|
lazy_entry const* v = p.dict_find_dict("v");
|
||||||
if( v ) {
|
if( v ) {
|
||||||
string bio = v->dict_find_string_value("bio");
|
string bio = v->dict_find_string_value("bio");
|
||||||
string fullname = v->dict_find_string_value("fullname");
|
string fullname = v->dict_find_string_value("fullname");
|
||||||
string location = v->dict_find_string_value("location");
|
string location = v->dict_find_string_value("location");
|
||||||
string url = v->dict_find_string_value("url");
|
string url = v->dict_find_string_value("url");
|
||||||
|
|
||||||
if( bio.find(keyword) != string::npos ||
|
if( searcher.matchText(bio) ||
|
||||||
fullname.find(keyword) != string::npos ||
|
searcher.matchText(fullname) ||
|
||||||
location.find(keyword) != string::npos ||
|
searcher.matchText(location) ||
|
||||||
url.find(keyword) != string::npos ) {
|
searcher.matchText(url) ) {
|
||||||
|
|
||||||
string n = target->dict_find_string_value("n");
|
string n = target->dict_find_string_value("n");
|
||||||
entry vEntry;
|
entry vEntry;
|
||||||
vEntry = *v;
|
vEntry = *v;
|
||||||
users.insert(pair<string,entry>(n,vEntry));
|
users.insert(pair<string,entry>(n,vEntry));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2571,6 +2709,8 @@ Value search(const Array& params, bool fHelp)
|
|||||||
// @todo: there should be a faster way
|
// @todo: there should be a faster way
|
||||||
std::multimap<string::size_type,std::string> usernamesByLength;
|
std::multimap<string::size_type,std::string> usernamesByLength;
|
||||||
|
|
||||||
|
boost::algorithm::to_lower(keyword);
|
||||||
|
|
||||||
string allowed = "abcdefghijklmnopqrstuvwxyz0123456789_";
|
string allowed = "abcdefghijklmnopqrstuvwxyz0123456789_";
|
||||||
for( int i = 0; i < allowed.size(); ++i ) {
|
for( int i = 0; i < allowed.size(); ++i ) {
|
||||||
set<string> usernames;
|
set<string> usernames;
|
||||||
@ -2593,6 +2733,12 @@ Value search(const Array& params, bool fHelp)
|
|||||||
// search hashtags
|
// search hashtags
|
||||||
std::multimap<string::size_type,std::string> hashtagsByLength;
|
std::multimap<string::size_type,std::string> hashtagsByLength;
|
||||||
|
|
||||||
|
#ifdef HAVE_BOOST_LOCALE
|
||||||
|
keyword = boost::locale::to_lower(keyword);
|
||||||
|
#else
|
||||||
|
boost::algorithm::to_lower(keyword);
|
||||||
|
#endif
|
||||||
|
|
||||||
{
|
{
|
||||||
LOCK(cs_seenHashtags);
|
LOCK(cs_seenHashtags);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user