Skip to content

Commit

Permalink
make the worker.Run() func acquire a lock by itself and make the work…
Browse files Browse the repository at this point in the history
…er stop channel buffered for 1
  • Loading branch information
equals215 committed Jul 15, 2024
1 parent fcd3d9d commit e49838a
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions internal/pkg/crawl/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,31 +66,30 @@ func (w *Worker) Run() {
w.crawlParameters.Log.Info("Worker stopped", "worker", w.id)
return
case item := <-w.crawlParameters.Frontier.PullChan:
locked := w.TryLock()
if !locked {
// Can it happen? I don't think so but let's be safe
if item == nil {
continue
}
w.Lock()

// If the crawl is paused, we wait until it's resumed
for w.crawlParameters.Paused.Get() || w.crawlParameters.Frontier.Paused.Get() {
time.Sleep(time.Second)
}

// If the host of the item is in the host exclusion list, we skip it
if item != nil && (utils.StringInSlice(item.Host, w.crawlParameters.ExcludedHosts) || !w.crawlParameters.checkIncludedHosts(item.Host)) {
if utils.StringInSlice(item.Host, w.crawlParameters.ExcludedHosts) || !w.crawlParameters.checkIncludedHosts(item.Host) {
if w.crawlParameters.UseHQ {
// If we are using the HQ, we want to mark the item as done
w.crawlParameters.HQFinishedChannel <- item
}

w.Unlock()
continue
}

// Launches the capture of the given item
w.unsafeCapture(item)
if locked {
w.Unlock()
}
w.Unlock()
}
}
}
Expand Down Expand Up @@ -138,7 +137,7 @@ func newWorker(crawlParameters *Crawl, id uint) *Worker {
currentItem: nil,
lastError: nil,
},
doneSignal: make(chan bool),
doneSignal: make(chan bool, 1),
crawlParameters: crawlParameters,
}
}

0 comments on commit e49838a

Please sign in to comment.