Created
March 4, 2019 07:40
-
-
Save krry/ddc640bbd469a11e8348432ac55269a2 to your computer and use it in GitHub Desktop.
An Answer to A Tour of Go Exercise: Web Crawler Concurrency
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// only touching the Crawl func, as instructed | |
// https://tour.golang.org/concurrency/10 | |
// Crawl uses fetcher to recursively crawl | |
// pages starting with url, to a maximum of depth. | |
func Crawl(url string, depth int, fetcher Fetcher) { | |
m := map[string]bool{url: true} | |
var mx sync.Mutex | |
var wg sync.WaitGroup | |
var subcrawl func(string, int) | |
subcrawl = func(url string, depth int) { | |
defer wg.Done() | |
if depth <= 0 { // quit digging at the bottom | |
return | |
} | |
body, urls, err := fetcher.Fetch(url) // go fetch the url | |
if err != nil { | |
fmt.Println(err) // log errors | |
return | |
} | |
fmt.Printf("found: %s %q\n", url, body) // log results | |
mx.Lock() | |
for _, u := range urls { | |
if !m[u] { | |
m[u] = true | |
wg.Add(1) | |
go subcrawl(u, depth-1) | |
} | |
} | |
mx.Unlock() | |
} | |
wg.Add(1) | |
subcrawl(url, depth) | |
wg.Wait() | |
} | |
/* returns: | |
found: https://golang.org/ "The Go Programming Language" | |
not found: https://golang.org/cmd/ | |
found: https://golang.org/pkg/ "Packages" | |
found: https://golang.org/pkg/fmt/ "Package fmt" | |
found: https://golang.org/pkg/os/ "Package os" | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment