From 672b56a86584722d3c3f57a837152cbf01a3778f Mon Sep 17 00:00:00 2001 From: Miguel Freitas Date: Sat, 15 Feb 2014 20:41:54 -0200 Subject: [PATCH] experimental top trending hashtags --- libtorrent/src/kademlia/node.cpp | 27 ++++++++++++- src/bitcoinrpc.cpp | 2 + src/bitcoinrpc.h | 1 + src/clientversion.h | 2 +- src/twister.cpp | 67 ++++++++++++++++++++++++++++++++ src/twister.h | 2 + 6 files changed, 99 insertions(+), 2 deletions(-) diff --git a/libtorrent/src/kademlia/node.cpp b/libtorrent/src/kademlia/node.cpp index 0bea1ad6..81ddb686 100644 --- a/libtorrent/src/kademlia/node.cpp +++ b/libtorrent/src/kademlia/node.cpp @@ -489,6 +489,22 @@ void node_impl::tick() } } +static void processEntryForHashtags(lazy_entry &p) +{ + const lazy_entry *target = p.dict_find_dict("target"); + bool multi = (target && target->dict_find_string_value("t") == "m"); + + const lazy_entry *v = p.dict_find_dict("v"); + if( v && !multi ) { + const lazy_entry *userpost = v->dict_find_dict("userpost"); + if( userpost ) { + int64_t time = p.dict_find_int_value("time"); + std::string msg = userpost->dict_find_string_value("msg"); + updateSeenHashtags(msg,time); + } + } +} + bool node_impl::refresh_storage() { bool did_something = false; bool refresh_next_item = false; @@ -524,7 +540,7 @@ bool node_impl::refresh_storage() { if( height > getBestHeight() ) { continue; // how? } - + const lazy_entry *target = p.dict_find_dict("target"); std::string username = target->dict_find_string_value("n"); std::string resource = target->dict_find_string_value("r"); @@ -544,6 +560,8 @@ bool node_impl::refresh_storage() { target->dict_find_string_value("t").c_str()); #endif + processEntryForHashtags(p); + entry entryP; entryP = p; // lazy to non-lazy @@ -689,6 +707,13 @@ void node_impl::load_storage(entry const* e) { #ifdef ENABLE_DHT_ITEM_EXPIRE if( !expired ) { #endif + lazy_entry p; + int pos; + error_code err; + // FIXME: optimize to avoid bdecode (store seq separated, etc) + int ret = lazy_bdecode(item.p.data(), item.p.data() + item.p.size(), p, err, &pos, 10, 500); + processEntryForHashtags(p); + to_add.push_back(item); #ifdef ENABLE_DHT_ITEM_EXPIRE } diff --git a/src/bitcoinrpc.cpp b/src/bitcoinrpc.cpp index 44bb8169..c1348414 100644 --- a/src/bitcoinrpc.cpp +++ b/src/bitcoinrpc.cpp @@ -260,6 +260,7 @@ static const CRPCCommand vRPCCommands[] = { "listusernamespartial", &listusernamespartial, false, true }, { "rescandirectmsgs", &rescandirectmsgs, false, true }, { "recheckusertorrent", &recheckusertorrent, false, true }, + { "gettrendinghashtags", &gettrendinghashtags, false, true }, }; CRPCTable::CRPCTable() @@ -1281,6 +1282,7 @@ Array RPCConvertValues(const std::string &strMethod, const std::vector 1) ConvertTo(params[1]); if (strMethod == "listusernamespartial" && n > 1) ConvertTo(params[1]); if (strMethod == "listusernamespartial" && n > 2) ConvertTo(params[2]); + if (strMethod == "gettrendinghashtags" && n > 0) ConvertTo(params[0]); return params; } diff --git a/src/bitcoinrpc.h b/src/bitcoinrpc.h index ec85f48d..07d3dcb8 100644 --- a/src/bitcoinrpc.h +++ b/src/bitcoinrpc.h @@ -212,5 +212,6 @@ extern json_spirit::Value getnumpieces(const json_spirit::Array& params, bool fH extern json_spirit::Value listusernamespartial(const json_spirit::Array& params, bool fHelp); extern json_spirit::Value rescandirectmsgs(const json_spirit::Array& params, bool fHelp); extern json_spirit::Value recheckusertorrent(const json_spirit::Array& params, bool fHelp); +extern json_spirit::Value gettrendinghashtags(const json_spirit::Array& params, bool fHelp); #endif diff --git a/src/clientversion.h b/src/clientversion.h index 4fc9ac66..57abfaf7 100644 --- a/src/clientversion.h +++ b/src/clientversion.h @@ -8,7 +8,7 @@ // These need to be macros, as version.cpp's and bitcoin-qt.rc's voodoo requires it #define CLIENT_VERSION_MAJOR 0 #define CLIENT_VERSION_MINOR 9 -#define CLIENT_VERSION_REVISION 13 +#define CLIENT_VERSION_REVISION 14 #define CLIENT_VERSION_BUILD 0 // Set to true for release, false for prerelease or test build diff --git a/src/twister.cpp b/src/twister.cpp index 4831e2b9..24650183 100644 --- a/src/twister.cpp +++ b/src/twister.cpp @@ -58,6 +58,9 @@ static std::string m_receivedSpamUserStr; static int64 m_lastSpamTime = 0; static std::map m_users; +static CCriticalSection cs_seenHashtags; +static std::map m_seenHashtags; + #define USER_DATA_FILE "user_data" #define GLOBAL_DATA_FILE "global_data" @@ -1174,6 +1177,44 @@ void receivedSpamMessage(std::string const &message, std::string const &user) } } +void updateSeenHashtags(std::string &message, int64_t msgTime) +{ + boost::int64_t curTime = GetAdjustedTime(); + if( msgTime > curTime ) msgTime = curTime; + + double vote = 1.0; + if( msgTime + (2*3600) < curTime ) { + double timeDiff = (curTime - msgTime); + timeDiff /= (2*3600); + vote /= timeDiff; + } + + // split and look for hashtags + vector tokens; + set hashtags; + boost::algorithm::split(tokens,message,boost::algorithm::is_any_of(" \n\t.,:/?!"), + boost::algorithm::token_compress_on); + BOOST_FOREACH(string const& token, tokens) { + if( token.length() >= 2 ) { + string word = token.substr(1); + boost::algorithm::to_lower(word); + if( token.at(0) == '#') { + hashtags.insert(word); + } + } + } + + if( hashtags.size() ) { + LOCK(cs_seenHashtags); + BOOST_FOREACH(string const& word, hashtags) { + if( m_seenHashtags.count(word) ) { + m_seenHashtags[word] += vote; + } else { + m_seenHashtags[word] = vote; + } + } + } +} Value dhtput(const Array& params, bool fHelp) { @@ -1902,4 +1943,30 @@ Value recheckusertorrent(const Array& params, bool fHelp) return Value(); } +Value gettrendinghashtags(const Array& params, bool fHelp) +{ + if (fHelp || (params.size() != 1)) + throw runtime_error( + "gettrendinghashtags \n" + "obtain list of trending hashtags"); + + size_t count = params[0].get_int(); + + std::map sortedHashtags; + { + LOCK(cs_seenHashtags); + BOOST_FOREACH(const PAIRTYPE(std::string,double)& item, m_seenHashtags) { + sortedHashtags[item.second]=item.first; + } + } + + Array ret; + BOOST_REVERSE_FOREACH(const PAIRTYPE(double, std::string)& item, sortedHashtags) { + if( ret.size() >= count ) + break; + ret.push_back(item.second); + } + + return ret; +} diff --git a/src/twister.h b/src/twister.h index 00b602b9..d8738cfc 100644 --- a/src/twister.h +++ b/src/twister.h @@ -39,4 +39,6 @@ bool shouldDhtResourceExpire(std::string resource, bool multi, int height); int getDhtNodes(boost::int64_t *dht_global_nodes = NULL); +void updateSeenHashtags(std::string &message, int64_t msgTime); + #endif // TWISTER_H