From 1fd35637021e40072ce94ca04a2fd7cdc71c8317 Mon Sep 17 00:00:00 2001 From: Lyndsay Roger Date: Thu, 1 Oct 2015 09:34:47 +1300 Subject: [PATCH] Improve performance by scanning list once --- main.go | 35 +++++++++++++++++++++--- node.go | 16 ----------- seeder.go | 81 +++++++++++++++++++------------------------------------ 3 files changed, 58 insertions(+), 74 deletions(-) diff --git a/main.go b/main.go index f7ec5f2..7f69273 100644 --- a/main.go +++ b/main.go @@ -144,14 +144,41 @@ func main() { // updateNodeCounts runs in a goroutine and updates the global stats with the latest // counts from a startCrawlers run -func updateNodeCounts(s *dnsseeder, status, total, started uint32) { - // update the stats counters +func updateNodeCounts(s *dnsseeder, tcount uint32, started, totals []uint32) { s.counts.mtx.Lock() - s.counts.NdStatus[status] = total - s.counts.NdStarts[status] = started + + for st := range []int{statusRG, statusCG, statusWG, statusNG} { + if config.stats { + log.Printf("%s: started crawler: %s total: %v started: %v\n", s.name, status2str(uint32(st)), totals[st], started[st]) + } + + // update the stats counters + s.counts.NdStatus[st] = totals[st] + s.counts.NdStarts[st] = started[st] + } + + if config.stats { + log.Printf("%s: crawlers started. total nodes: %d\n", s.name, tcount) + } s.counts.mtx.Unlock() } +// status2str will return the string description of the status +func status2str(status uint32) string { + switch status { + case statusRG: + return "statusRG" + case statusCG: + return "statusCG" + case statusWG: + return "statusWG" + case statusNG: + return "statusNG" + default: + return "Unknown" + } +} + // updateDNSCounts runs in a goroutine and updates the global stats for the number of DNS requests func updateDNSCounts(name, qtype string) { var ndType uint32 diff --git a/node.go b/node.go index e54507d..ba5e086 100644 --- a/node.go +++ b/node.go @@ -27,22 +27,6 @@ type node struct { crawlActive bool // are we currently crawling this client } -// status2str will return the string description of the status -func (nd node) status2str() string { - switch nd.status { - case statusRG: - return "statusRG" - case statusCG: - return "statusCG" - case statusWG: - return "statusWG" - case statusNG: - return "statusNG" - default: - return "Unknown" - } -} - // dns2str will return the string description of the dns type func (nd node) dns2str() string { switch nd.dnsType { diff --git a/seeder.go b/seeder.go index 9c56d73..4d93801 100644 --- a/seeder.go +++ b/seeder.go @@ -62,7 +62,6 @@ type dnsseeder struct { maxStart []uint32 // max number of goroutines to start each run for each status type delay []int64 // number of seconds to wait before we connect to a known client for each status counts NodeCounts // structure to hold stats for this seeder - shutdown bool // seeder is shutting down } type result struct { @@ -167,7 +166,10 @@ func (s *dnsseeder) runSeeder(done <-chan struct{}, wg *sync.WaitGroup) { // goroutines if there are spare goroutine slots available func (s *dnsseeder) startCrawlers(resultsChan chan *result) { - tcount := len(s.theList) + s.mtx.RLock() + defer s.mtx.RUnlock() + + tcount := uint32(len(s.theList)) if tcount == 0 { if config.debug { log.Printf("%s - debug - startCrawlers fail: no node ailable\n", s.name) @@ -175,69 +177,40 @@ func (s *dnsseeder) startCrawlers(resultsChan chan *result) { return } - // struct to hold config options for each status - var crawlers = []struct { - desc string - status uint32 - maxCount uint32 // max goroutines to start for this status type - totalCount uint32 // stats count of this type - started uint32 // count of goroutines started for this type - delay int64 // number of second since last try - }{ - {"statusRG", statusRG, s.maxStart[statusRG], 0, 0, s.delay[statusRG]}, - {"statusCG", statusCG, s.maxStart[statusCG], 0, 0, s.delay[statusCG]}, - {"statusWG", statusWG, s.maxStart[statusWG], 0, 0, s.delay[statusWG]}, - {"statusNG", statusNG, s.maxStart[statusNG], 0, 0, s.delay[statusNG]}, - } - - s.mtx.RLock() - defer s.mtx.RUnlock() - - // step through each of the status types RG, CG, WG, NG - for _, c := range crawlers { - - // range on a map will not return items in the same order each time - // so this is a random'ish selection - for _, nd := range s.theList { - - if nd.status != c.status { - continue - } + started := make([]uint32, maxStatusTypes) + totals := make([]uint32, maxStatusTypes) - // stats count - c.totalCount++ + // range on a map will not return items in the same order each time + // so this is a random'ish selection + for _, nd := range s.theList { - if nd.crawlActive == true { - continue - } + totals[nd.status]++ - if c.started >= c.maxCount { - continue - } + if nd.crawlActive == true { + continue + } - if (time.Now().Unix() - c.delay) <= nd.lastTry.Unix() { - continue - } + // capture the node status + ns := nd.status - nd.crawlActive = true - nd.crawlStart = time.Now() - // all looks good so start a go routine to crawl the remote node - go crawlNode(resultsChan, s, nd) - c.started++ + // do we already have enough started at this status + if started[ns] >= s.maxStart[ns] { + continue } - if config.stats { - log.Printf("%s: started crawler: %s total: %v started: %v\n", s.name, c.desc, c.totalCount, c.started) + // don't crawl a node to quickly + if (time.Now().Unix() - s.delay[ns]) <= nd.lastTry.Unix() { + continue } - // update the global stats in another goroutine to free the main goroutine - // for other work - go updateNodeCounts(s, c.status, c.totalCount, c.started) + // all looks good so start a go routine to crawl the remote node + go crawlNode(resultsChan, s, nd) + started[ns]++ } - if config.stats { - log.Printf("%s: crawlers started. total clients: %d\n", s.name, tcount) - } + // update the global stats in another goroutine to free the main goroutine + // for other work + go updateNodeCounts(s, tcount, started, totals) // returns and read lock released }