-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper.go
58 lines (44 loc) · 1.38 KB
/
scrapper.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
package main
import (
"encoding/json"
"fmt"
"os"
"strings"
"github.com/gocolly/colly"
)
type item struct {
Title string `json:"title"`
Author string `json:"author"`
Date string `json:"date"`
ProfileUrl string `json:"profile_url"`
UserPageUrl string `json:"user_page_url"`
Tags []string `json:"tags"`
PageUrl string `json:"page_url"`
}
func main() {
c := colly.NewCollector(
colly.AllowedDomains("dev.to"),
)
var items []item
c.OnHTML("div.crayons-story__body", func(h *colly.HTMLElement) {
item := item{
Title: h.ChildText("div.crayons-story__indention h2.crayons-story__title a[href]"),
Author: h.ChildText("div.profile-preview-card button[id]"),
ProfileUrl: h.ChildAttr("a.crayons-avatar img", "src"),
UserPageUrl: h.Request.AbsoluteURL(h.ChildAttr("a.crayons-avatar", "href")),
Date: h.ChildAttr("time", "datetime"),
Tags: strings.Split(h.ChildText("div.crayons-story__tags a.crayons-tag"), "#")[1:],
PageUrl: h.Request.AbsoluteURL(h.ChildAttr("div.crayons-story__indention h2.crayons-story__title a", "href")),
}
items = append(items, item)
})
c.OnRequest(func(r *colly.Request) {
fmt.Println(r.URL.String())
})
c.Visit("https://dev.to/top/week")
content, err := json.Marshal(items)
if err != nil {
fmt.Println(err)
}
os.WriteFile("output.json", content, 0644)
}