Skip to content

Commit

Permalink
updated README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
tikazyq committed Aug 11, 2020
1 parent 4591cc1 commit 37a3196
Showing 1 changed file with 63 additions and 0 deletions.
63 changes: 63 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,67 @@ Crawlab Go SDK supports Golang-based spiders integration with Crawlab. It contai

## Basic Usage

```go
package main

import (
"github.com/crawlab-team/crawlab-go-sdk"
"github.com/crawlab-team/crawlab-go-sdk/entity"
)

func main() {
item := entity.Item{}
item["url"] = "http://example.com"
item["title"] = "hello world"
_ = crawlab.SaveItem(item)
}

```

## Example Using Colly

```go
package main

import (
"fmt"
"github.com/apex/log"
"github.com/crawlab-team/crawlab-go-sdk"
"github.com/crawlab-team/crawlab-go-sdk/entity"
"github.com/gocolly/colly/v2"
"runtime/debug"
)

func main() {
startUrl := "https://www.baidu.com/s?wd=crawlab"

c := colly.NewCollector(
colly.AllowedDomains("www.baidu.com"),
colly.Async(true),
colly.UserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"),
)

c.OnHTML("#content_left > .c-container", func(e *colly.HTMLElement) {
item := entity.Item{}
item["title"] = e.ChildText("h3.t > a")
item["url"] = e.ChildAttr("h3.t > a", "href")
if err := crawlab.SaveItem(item); err != nil {
log.Errorf("save item error: " + err.Error())
debug.PrintStack()
return
}
})

c.OnRequest(func(r *colly.Request) {
log.Debugf(fmt.Sprintf("Visiting %s", r.URL.String()))
})

if err := c.Visit(startUrl); err != nil {
log.Errorf("visit error: " + err.Error())
debug.PrintStack()
panic(fmt.Sprintf("Unable to visit %s", startUrl))
}

c.Wait()
}
```

0 comments on commit 37a3196

Please sign in to comment.