https://github.com/gocolly/colly
爬虫
package mainimport ("fmt""log""strconv""time""github.com/gocolly/colly")func main() {t := time.Now()c := colly.NewCollector()// Limit the number of threads started by colly to two// when visiting links which domains' matches "*httpbin.*" globc.Limit(&colly.LimitRule{DomainGlob: "*wufazhuce.*",Parallelism: 2,RandomDelay: 5 * time.Second,})// On every a element which has href attribute call callbackc.OnHTML("div.tab-content", func(e *colly.HTMLElement) {// 插画地址imageURL := e.ChildAttr("img", "src")fmt.Printf("imageURL: %s \n", imageURL)// 引言citation := e.ChildText("div.one-cita")fmt.Printf("citation: %s \n", citation)// 标号vol := e.ChildText("div.one-titulo")fmt.Printf("vol: %s \n", vol)// 发布日publishDate := e.ChildText("p.dom") + " " + e.ChildText("p.may")fmt.Printf("publishDate: %s \n", publishDate)})// Before making a request print "Visiting ..."c.OnRequest(func(r *colly.Request) {fmt.Println("Visiting", r.URL.String())})c.OnScraped(func(r *colly.Response) {fmt.Println("Finished", r.Request.URL)})c.OnError(func(_ *colly.Response, err error) {log.Println("Something went wrong:", err)})for i := 1; i < 2819; i++ {c.Visit("http://wufazhuce.com/one/" + strconv.Itoa(i))}c.Wait()fmt.Printf("花费时间:%s", time.Since(t))}
