mirror of
https://github.com/twisterarmy/twister-core.git
synced 2025-01-22 20:44:56 +00:00
improve search
This commit is contained in:
parent
c500409517
commit
080ac30468
310
src/twister.cpp
310
src/twister.cpp
@ -2347,33 +2347,201 @@ Value torrentstatus(const Array& params, bool fHelp)
|
||||
return result;
|
||||
}
|
||||
|
||||
class TextSearch
|
||||
{
|
||||
public:
|
||||
enum search_mode {
|
||||
TEXTSEARCH_EXACT,
|
||||
TEXTSEARCH_ALL,
|
||||
TEXTSEARCH_ANY
|
||||
};
|
||||
|
||||
TextSearch(std::string const &keyword, libtorrent::entry const ¶ms);
|
||||
|
||||
bool matchText(std::string msg);
|
||||
libtorrent::lazy_entry const* matchRawMessage(std::string const &rawMessage);
|
||||
|
||||
private:
|
||||
std::vector<std::string> keywords;
|
||||
search_mode mode;
|
||||
bool caseInsensitive;
|
||||
int64_t timeMin, timeMax;
|
||||
std::string username;
|
||||
};
|
||||
|
||||
TextSearch::TextSearch(string const &keyword, entry const ¶ms) :
|
||||
mode(TEXTSEARCH_EXACT),
|
||||
caseInsensitive(false),
|
||||
timeMin(0),
|
||||
timeMax(numeric_limits<int64_t>::max())
|
||||
{
|
||||
entry const *pMode = params.find_key("mode");
|
||||
if( pMode && pMode->type() == entry::string_t ) {
|
||||
string strMode = pMode->string();
|
||||
if( strMode == "all" ) {
|
||||
mode = TEXTSEARCH_ALL;
|
||||
} else if( strMode == "any" ) {
|
||||
mode = TEXTSEARCH_ANY;
|
||||
}
|
||||
}
|
||||
|
||||
entry const *pCase = params.find_key("case");
|
||||
if( pCase && pCase->type() == entry::string_t && pCase->string() == "insensitive" ) {
|
||||
caseInsensitive = true;
|
||||
}
|
||||
|
||||
int64_t now = GetAdjustedTime();
|
||||
|
||||
entry const *pAgeMin = params.find_key("agemin");
|
||||
if( pAgeMin && pAgeMin->type() == entry::int_t ) {
|
||||
timeMax = now - pAgeMin->integer() * 24*60*60;
|
||||
}
|
||||
|
||||
entry const *pAgeMax = params.find_key("agemax");
|
||||
if( pAgeMax && pAgeMax->type() == entry::int_t ) {
|
||||
timeMin = now - pAgeMax->integer() * 24*60*60;
|
||||
}
|
||||
|
||||
entry const *pUsername = params.find_key("username");
|
||||
if( pUsername && pUsername->type() == entry::string_t ) {
|
||||
username = pUsername->string();
|
||||
}
|
||||
|
||||
if( mode == TEXTSEARCH_EXACT ) {
|
||||
keywords.push_back( keyword );
|
||||
} else {
|
||||
stringstream stream( keyword );
|
||||
string word;
|
||||
while( getline(stream, word, ' ') ) {
|
||||
if( !word.empty() ) {
|
||||
keywords.push_back( word );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( caseInsensitive ) {
|
||||
for( vector<string>::iterator it=keywords.begin(); it != keywords.end(); ++it ) {
|
||||
#ifdef HAVE_BOOST_LOCALE
|
||||
*it = boost::locale::to_lower(*it);
|
||||
#else
|
||||
boost::algorithm::to_lower(*it);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool TextSearch::matchText(string msg)
|
||||
{
|
||||
if( keywords.size() == 0 ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if( caseInsensitive ) {
|
||||
#ifdef HAVE_BOOST_LOCALE
|
||||
msg = boost::locale::to_lower(msg);
|
||||
#else
|
||||
boost::algorithm::to_lower(msg);
|
||||
#endif
|
||||
}
|
||||
|
||||
switch( mode ) {
|
||||
case TEXTSEARCH_EXACT:
|
||||
return msg.find(keywords[0]) != string::npos;
|
||||
case TEXTSEARCH_ALL:
|
||||
for( vector<string>::const_iterator it=keywords.begin(); it != keywords.end(); ++it ) {
|
||||
if( msg.find(*it) == string::npos ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
case TEXTSEARCH_ANY:
|
||||
for( vector<string>::const_iterator it=keywords.begin(); it != keywords.end(); ++it ) {
|
||||
if( msg.find(*it) != string::npos ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
lazy_entry const* TextSearch::matchRawMessage(string const &rawMessage)
|
||||
{
|
||||
if( keywords.size() == 0 ) {
|
||||
return 0;
|
||||
}
|
||||
// fast check
|
||||
if( mode != TEXTSEARCH_ANY && rawMessage.find(keywords[0]) == string::npos ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
lazy_entry v;
|
||||
int pos;
|
||||
libtorrent::error_code ec;
|
||||
if (lazy_bdecode(rawMessage.data(), rawMessage.data()+rawMessage.size(), v, ec, &pos) == 0) {
|
||||
lazy_entry const* vv = v.dict_find_dict("v");
|
||||
lazy_entry const* post = vv ? vv->dict_find_dict("userpost") : v.dict_find_dict("userpost");
|
||||
if( post ) {
|
||||
lazy_entry const* rt = post->dict_find_dict("rt");
|
||||
lazy_entry const* p = rt ? rt : post;
|
||||
|
||||
if( username.length() ) {
|
||||
string user = p->dict_find_string_value("n");
|
||||
if( user != username ) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int64_t time = p->dict_find_int_value("time");
|
||||
if( time < timeMin || time > timeMax ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
string msg = p->dict_find_string_value("msg");
|
||||
return matchText( msg ) ? p : 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Value search(const Array& params, bool fHelp)
|
||||
{
|
||||
if (fHelp || params.size() < 3 || params.size() > 4)
|
||||
throw runtime_error(
|
||||
"search <scope> <text> <count> [<username>]\n"
|
||||
"search text in known data\n"
|
||||
"search <scope> <text> <count> ['{\"username\":username,\"mode\":\"exact\"|\"all\"|\"any\",\"case\":\"sensitive\"\"insensitive\",\"agemin\":agemin,\"agemax\":agemin}']\n"
|
||||
"search text in available data\n"
|
||||
"<scope> is data area: messages, directmsgs, profiles, users, hashtags\n"
|
||||
"<text> is a phrase to search\n"
|
||||
"up to <count> entries are returned\n"
|
||||
"<username> in messages scope is optional and allows to search in username's messages only\n"
|
||||
"<username> in directmsgs scope is required and sets whose conversation to search");
|
||||
"<username> in directmsgs scope is required and sets whose conversation to search\n"
|
||||
"\"mode\" and \"case\" are search mode options\n"
|
||||
"\"agemin\" and \"agemax\" (days) are message filters\n"
|
||||
"\"mode\", \"case\", \"agemin\", and \"agemax\" are optional");
|
||||
|
||||
string scope = params[0].get_str();
|
||||
string keyword = params[1].get_str();
|
||||
int count = params[2].get_int();
|
||||
string username = params.size()==4 ? params[3].get_str() : string();
|
||||
entry options = params.size()==4 ? jsonToEntry(params[3].get_obj()) : entry();
|
||||
string username;
|
||||
|
||||
if( keyword.size() == 0 ) {
|
||||
throw runtime_error("Empty <text> parameter");
|
||||
}
|
||||
|
||||
entry const *pUsername = options.find_key("username");
|
||||
if( pUsername && pUsername->type() == entry::string_t ) {
|
||||
username = pUsername->string();
|
||||
}
|
||||
|
||||
Array ret;
|
||||
|
||||
if( scope == "messages" ) {
|
||||
// search public messages
|
||||
std::map< pair<std::string,int>, pair<int64,entry> > posts;
|
||||
|
||||
TextSearch searcher(keyword, options);
|
||||
|
||||
// search public messages in torrents
|
||||
{
|
||||
LOCK(cs_twister);
|
||||
@ -2391,29 +2559,17 @@ Value search(const Array& params, bool fHelp)
|
||||
item.second.get_pieces(pieces, std::numeric_limits<int>::max(), std::numeric_limits<int>::max(), -1, ~USERPOST_FLAG_DM);
|
||||
|
||||
BOOST_FOREACH(string const& piece, pieces) {
|
||||
if( piece.find(keyword) != string::npos ) {
|
||||
lazy_entry v;
|
||||
int pos;
|
||||
libtorrent::error_code ec;
|
||||
if (lazy_bdecode(piece.data(), piece.data()+piece.size(), v, ec, &pos) == 0) {
|
||||
lazy_entry const* post = v.dict_find_dict("userpost");
|
||||
if( post ) {
|
||||
lazy_entry const* rt = post->dict_find_dict("rt");
|
||||
lazy_entry const* p = rt ? rt : post;
|
||||
string msg = p->dict_find_string_value("msg");
|
||||
if( msg.find(keyword) != string::npos ) {
|
||||
string n = p->dict_find_string_value("n");
|
||||
int k = p->dict_find_int_value("k");
|
||||
int64 time = p->dict_find_int_value("time",-1);
|
||||
lazy_entry const* p = searcher.matchRawMessage(piece);
|
||||
if( p ) {
|
||||
string n = p->dict_find_string_value("n");
|
||||
int k = p->dict_find_int_value("k");
|
||||
int64 time = p->dict_find_int_value("time",-1);
|
||||
|
||||
entry vEntry;
|
||||
vEntry = *p;
|
||||
hexcapePost(vEntry);
|
||||
entry vEntry;
|
||||
vEntry = *p;
|
||||
hexcapePost(vEntry);
|
||||
|
||||
posts[pair<std::string,int>(n,k)] = pair<int64,entry>(time,vEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
posts[pair<std::string,int>(n,k)] = pair<int64,entry>(time,vEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2430,39 +2586,19 @@ Value search(const Array& params, bool fHelp)
|
||||
continue;
|
||||
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
|
||||
string str_p = j->find_key("p")->string();
|
||||
if( str_p.find(keyword) != string::npos ) {
|
||||
lazy_entry p;
|
||||
int pos;
|
||||
libtorrent::error_code err;
|
||||
int ret = lazy_bdecode(str_p.data(), str_p.data() + str_p.size(), p, err, &pos);
|
||||
lazy_entry const* p = searcher.matchRawMessage(str_p);
|
||||
if( p ) {
|
||||
string n = p->dict_find_string_value("n");
|
||||
int k = p->dict_find_int_value("k");
|
||||
pair<std::string,int> post_id(n,k);
|
||||
if( posts.count(post_id) == 0 ) {
|
||||
int64 time = p->dict_find_int_value("time",-1);
|
||||
|
||||
lazy_entry const* v = p.dict_find_dict("v");
|
||||
if( v ) {
|
||||
lazy_entry const* post = v->dict_find_dict("userpost");
|
||||
if( post ) {
|
||||
// post, mention, status
|
||||
lazy_entry const* rt = post->dict_find_dict("rt");
|
||||
lazy_entry const* p = rt ? rt : post;
|
||||
string msg = p->dict_find_string_value("msg");
|
||||
if( msg.find(keyword) != string::npos ) {
|
||||
string n = p->dict_find_string_value("n");
|
||||
entry vEntry;
|
||||
vEntry = *p;
|
||||
hexcapePost(vEntry);
|
||||
|
||||
if( username.size() == 0 || n == username ) {
|
||||
int k = p->dict_find_int_value("k");
|
||||
|
||||
pair<std::string,int> post_id(n,k);
|
||||
if( posts.count(post_id) == 0 ) {
|
||||
int64 time = p->dict_find_int_value("time",-1);
|
||||
|
||||
entry vEntry;
|
||||
vEntry = *p;
|
||||
hexcapePost(vEntry);
|
||||
|
||||
posts[post_id] = pair<int64,entry>(time,vEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
posts[post_id] = pair<int64,entry>(time,vEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2481,16 +2617,18 @@ Value search(const Array& params, bool fHelp)
|
||||
|
||||
} else if( scope == "directmsgs" ) {
|
||||
// search direct messages
|
||||
if( m_users.count(username) ){
|
||||
if( m_users.count(username) ) {
|
||||
std::multimap<int64,entry> postsByTime;
|
||||
|
||||
TextSearch searcher(keyword, options);
|
||||
|
||||
{
|
||||
LOCK(cs_twister);
|
||||
|
||||
BOOST_FOREACH(const PAIRTYPE(std::string,std::vector<StoredDirectMsg>)& list, m_users[username].m_directmsg) {
|
||||
string remoteUser = list.first;
|
||||
BOOST_FOREACH(const StoredDirectMsg& item, list.second) {
|
||||
if( item.m_text.find(keyword) != string::npos ) {
|
||||
if( searcher.matchText(item.m_text) ) {
|
||||
int64 time = item.m_utcTime;
|
||||
entry vEntry;
|
||||
vEntry["remoteUser"] = remoteUser;
|
||||
@ -2518,38 +2656,38 @@ Value search(const Array& params, bool fHelp)
|
||||
entry data = ses->dht_getLocalData();
|
||||
std::map<string,entry> users;
|
||||
|
||||
TextSearch searcher(keyword, options);
|
||||
|
||||
for (entry::dictionary_type::const_iterator i = data.dict().begin(); i != data.dict().end(); ++i) {
|
||||
if ( i->second.type() != entry::list_t )
|
||||
continue;
|
||||
for (entry::list_type::const_iterator j = i->second.list().begin(); j != i->second.list().end(); ++j) {
|
||||
string str_p = j->find_key("p")->string();
|
||||
if( str_p.find(keyword) != string::npos ) {
|
||||
lazy_entry p;
|
||||
int pos;
|
||||
libtorrent::error_code err;
|
||||
int ret = lazy_bdecode(str_p.data(), str_p.data() + str_p.size(), p, err, &pos);
|
||||
lazy_entry p;
|
||||
int pos;
|
||||
libtorrent::error_code err;
|
||||
int ret = lazy_bdecode(str_p.data(), str_p.data() + str_p.size(), p, err, &pos);
|
||||
|
||||
lazy_entry const* target = p.dict_find_dict("target");
|
||||
if( target ) {
|
||||
string resource = target->dict_find_string_value("r");
|
||||
if( resource == "profile" ) {
|
||||
lazy_entry const* v = p.dict_find_dict("v");
|
||||
if( v ) {
|
||||
string bio = v->dict_find_string_value("bio");
|
||||
string fullname = v->dict_find_string_value("fullname");
|
||||
string location = v->dict_find_string_value("location");
|
||||
string url = v->dict_find_string_value("url");
|
||||
lazy_entry const* target = p.dict_find_dict("target");
|
||||
if( target ) {
|
||||
string resource = target->dict_find_string_value("r");
|
||||
if( resource == "profile" ) {
|
||||
lazy_entry const* v = p.dict_find_dict("v");
|
||||
if( v ) {
|
||||
string bio = v->dict_find_string_value("bio");
|
||||
string fullname = v->dict_find_string_value("fullname");
|
||||
string location = v->dict_find_string_value("location");
|
||||
string url = v->dict_find_string_value("url");
|
||||
|
||||
if( bio.find(keyword) != string::npos ||
|
||||
fullname.find(keyword) != string::npos ||
|
||||
location.find(keyword) != string::npos ||
|
||||
url.find(keyword) != string::npos ) {
|
||||
if( searcher.matchText(bio) ||
|
||||
searcher.matchText(fullname) ||
|
||||
searcher.matchText(location) ||
|
||||
searcher.matchText(url) ) {
|
||||
|
||||
string n = target->dict_find_string_value("n");
|
||||
entry vEntry;
|
||||
vEntry = *v;
|
||||
users.insert(pair<string,entry>(n,vEntry));
|
||||
}
|
||||
string n = target->dict_find_string_value("n");
|
||||
entry vEntry;
|
||||
vEntry = *v;
|
||||
users.insert(pair<string,entry>(n,vEntry));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2571,6 +2709,8 @@ Value search(const Array& params, bool fHelp)
|
||||
// @todo: there should be a faster way
|
||||
std::multimap<string::size_type,std::string> usernamesByLength;
|
||||
|
||||
boost::algorithm::to_lower(keyword);
|
||||
|
||||
string allowed = "abcdefghijklmnopqrstuvwxyz0123456789_";
|
||||
for( int i = 0; i < allowed.size(); ++i ) {
|
||||
set<string> usernames;
|
||||
@ -2593,6 +2733,12 @@ Value search(const Array& params, bool fHelp)
|
||||
// search hashtags
|
||||
std::multimap<string::size_type,std::string> hashtagsByLength;
|
||||
|
||||
#ifdef HAVE_BOOST_LOCALE
|
||||
keyword = boost::locale::to_lower(keyword);
|
||||
#else
|
||||
boost::algorithm::to_lower(keyword);
|
||||
#endif
|
||||
|
||||
{
|
||||
LOCK(cs_seenHashtags);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user