Skip to content

Commit

Permalink
GetFavicon() + GetLinks() improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
devnyxie committed Jul 23, 2024
1 parent 8809b4a commit 3c281b0
Show file tree
Hide file tree
Showing 15 changed files with 172 additions and 305 deletions.
14 changes: 5 additions & 9 deletions GetDescription_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,18 +103,14 @@ func TestGetDescription(t *testing.T) {
var result string
var err error

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()
mockServer := MockServer(t, tt.responseBody)
defer mockServer.Close()

if tt.mockupServerNeed {
f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()
mockServer := MockServer(t, tt.responseBody)
defer mockServer.Close()
result, err = f.GetDescription(mockServer.URL)
} else {
f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()
htmlTemplate := tt.responseBody
mockServer := MockServer(t, htmlTemplate)
defer mockServer.Close()
result, err = f.GetDescription(tt.url)
}

Expand Down
43 changes: 32 additions & 11 deletions GetFavicons.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package katsuragi

import (
"fmt"
"net/http"
Url "net/url"
"strings"

"golang.org/x/net/html"
)
Expand All @@ -15,7 +15,11 @@ func (f *Fetcher) GetFavicons(url string) ([]string, error) {
}
favicons, found := traverseAndExtractFavicons(htmlDoc, url)
if !found {
return nil, fmt.Errorf("GetFavicon failed to find any favicon in HTML")
getRootFaviconIco(&favicons, url)
}
// if not found, return error
if !found && len(favicons) == 0 {
return nil, fmt.Errorf("GetFavicon failed to find any favicons")
}
return favicons, nil
}
Expand All @@ -24,22 +28,44 @@ func (f *Fetcher) GetFavicons(url string) ([]string, error) {
var validRel = map[string]bool{
"icon": true,
"apple-touch-icon": true,
"shortcut icon": true,

// to be reviewed:
// "shortcut icon": true,
// "fluid-icon": true,
// "mask-icon": true,
// "alternate icon": true,
}
var validMeta = map[string]bool{
"og:image": true,

// to be implemented:
// to be reviewed:
//"twitter:image:src": true,
//"twitter:image": true,
}


// Many websites, "https://docs.microsoft.com" for example, do not have a favicon tag in the HTML, but
// they have a favicon.ico file in the root directory which is fetched by browsers.
// This function tries to fetch the favicon.ico file from the root directory of the website. If the file is found,
// it is added to the list of favicons.
func getRootFaviconIco(existingFavicons *[]string, url string) error {
parsedUrl, _ := Url.Parse(url)
domain := parsedUrl.Scheme + "://" + parsedUrl.Host + "/favicon.ico"
if !contains(*existingFavicons, domain) {
// test: mockup server, 200 "/", 404 "/favicon.ico"
resp, err := http.Get(domain)
if err != nil {
return fmt.Errorf("failed to fetch favicon.ico: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
*existingFavicons = append(*existingFavicons, domain)
} else {
fmt.Println("Favicon.ico not found:", resp.Status)
}
}
return nil
}

// traverseAndExtractFavicons traverses the HTML node tree and extracts favicon URLs
func traverseAndExtractFavicons(n *html.Node, url string) ([]string, bool) {
Expand Down Expand Up @@ -80,13 +106,8 @@ func traverseAndExtractFavicons(n *html.Node, url string) ([]string, bool) {
if len(favicons) > 0 {
// If the favicon URL is a relative path, we should prepend the scheme and host of the URL
for i, faviconURL := range favicons {
if !strings.HasPrefix(faviconURL, "http") {
uri, _ := Url.Parse(url)
if !strings.HasPrefix(faviconURL, "/") {
faviconURL = "/" + faviconURL
}
favicons[i] = uri.Scheme + "://" + uri.Host + faviconURL
}
favicons[i] = ensureAbsoluteURL(faviconURL, url)

}
return favicons, true
}
Expand Down
152 changes: 3 additions & 149 deletions GetFavicons_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,152 +4,6 @@ import (
"testing"
)

// invalid URL
func TestGetFavicons_InvalidURL(t *testing.T) {
f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

htmlTemplate := ``
mockServer := MockServer(t, htmlTemplate)
defer mockServer.Close()

_, err := f.GetFavicons("255.255.255.0")

if err == nil {
t.Fatalf("Expected an error, got none")
}
}

// no favicon tags
func TestGetFavicons_NoFavicons(t *testing.T) {
htmlContent := `<html><head><title>No Favicons Here</title></head><body></body></html>`
mockServer := MockServer(t, htmlContent)
defer mockServer.Close()

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

_, err := f.GetFavicons(mockServer.URL)
//expected error
if err == nil {
t.Fatalf("Expected an error, got none")
}
}

// TestGetFavicons_MultipleFavicons tests fetching from a URL with multiple favicon links
func TestGetFavicons_MultipleFavicons(t *testing.T) {
htmlContent := `<html><head>
<link rel="icon" href="favicon.ico" sizes="16x16">
<link rel="icon" href="favicon-32.png" sizes="32x32">
<link rel="apple-touch-icon" href="apple-touch-icon.png" sizes="180x180">
</head><body></body></html>`
mockServer := MockServer(t, htmlContent)
defer mockServer.Close()

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

favicons, err := f.GetFavicons(mockServer.URL)
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
if len(favicons) != 3 {
t.Fatalf("Expected to find 3 favicons, found %d", len(favicons))
}
}

// "icon" tag
func TestGetFavicons_IconTag(t *testing.T) {
htmlContent := `<html><head><link rel="icon" href="/favicon.ico"></head><body></body></html>`
mockServer := MockServer(t, htmlContent)
defer mockServer.Close()

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

favicons, err := f.GetFavicons(mockServer.URL)
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
if len(favicons) != 1 {
t.Fatalf("Expected to find 1 favicon, found %d", len(favicons))
}
}

// "apple-touch-icon" tag
func TestGetFavicons_AppleTouchIconTag(t *testing.T) {
htmlContent := `<html><head><link rel="apple-touch-icon" href="/apple-touch-icon.png"></head><body></body></html>`
mockServer := MockServer(t, htmlContent)
defer mockServer.Close()

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

favicons, err := f.GetFavicons(mockServer.URL)
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
if len(favicons) != 1 {
t.Fatalf("Expected to find 1 favicon, found %d", len(favicons))
}
}

// "og:image" tag
func TestGetFavicons_OgImageTag_NoSizeSpecified(t *testing.T) {
htmlContent := `<html><head><meta property="og:image" content="og-image.png"></head><body></body></html>`
mockServer := MockServer(t, htmlContent)
defer mockServer.Close()

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

favicons, err := f.GetFavicons(mockServer.URL)

if err == nil {
t.Fatalf("Expected an error, got none")
}

if len(favicons) != 0 {
t.Fatalf("Expected to find 0 favicons, found %d", len(favicons))
}
}

// "og:image" tag with non-square aspect ratio specified
func TestGetFavicons_OgImageTag_NonSquare(t *testing.T) {
htmlContent := `<html><head><meta property="og:image" content="og-image.png"><meta property="og:image:type" content="image/png"><meta property="og:image:width" content="1200"><meta property="og:image:height" content="630"></head><body></body></html>`
mockServer := MockServer(t, htmlContent)
defer mockServer.Close()

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

favicons, err := f.GetFavicons(mockServer.URL)
if err == nil {
t.Fatalf("Expected an error due to non-square aspect ratio, got none")
}
if len(favicons) != 0 {
t.Fatalf("Expected to find 0 favicons, found %d", len(favicons))
}
}

// "og:image" tag with square aspect ratio specified
func TestGetFavicons_OgImageTag_Square(t *testing.T) {
htmlContent := `<html><head><meta property="og:image" content="og-image.png"><meta property="og:image:type" content="image/png"><meta property="og:image:width" content="1200"><meta property="og:image:height" content="1200"></head><body></body></html>`
mockServer := MockServer(t, htmlContent)
defer mockServer.Close()

f := NewFetcher(&FetcherProps{Timeout: 3000, CacheCap: 10})
defer f.ClearCache()

favicons, err := f.GetFavicons(mockServer.URL)
if err != nil {
t.Fatalf("Expected no error, got: %v", err)
}
if len(favicons) != 1 {
t.Fatalf("Expected to find 1 favicon, found %d", len(favicons))
}
}

// all in one
func TestGetFavicons_AllInOne(t *testing.T) {
tests := []struct {
Expand All @@ -172,7 +26,7 @@ func TestGetFavicons_AllInOne(t *testing.T) {
url: "",
mockupServerNeed: true,
responseBody: `<html><head></head><body></body></html>`,
expectedErr: "no <head> element found",
expectedErr: "GetFavicon failed to find any favicons",
expectedResLength: 0,
},
{
Expand Down Expand Up @@ -205,15 +59,15 @@ func TestGetFavicons_AllInOne(t *testing.T) {
url: "",
mockupServerNeed: true,
responseBody: `<html><head><meta property="og:image" content="og-image.png"></head><body></body></html>`,
expectedErr: "GetFavicon failed to find any favicon in HTML",
expectedErr: "GetFavicon failed to find any favicons",
expectedResLength: 0,
},
{
name: "OG Image Tag - Non 1:1 Aspect Ratio",
url: "",
mockupServerNeed: true,
responseBody: `<html><head><meta property="og:image" content="og-image.png"><meta property="og:image:type" content="image/png"><meta property="og:image:width" content="1200"><meta property="og:image:height" content="630"></head><body></body></html>`,
expectedErr: "GetFavicon failed to find any favicon in HTML",
expectedErr: "GetFavicon failed to find any favicons",
expectedResLength: 0,
},
{
Expand Down
Loading

0 comments on commit 3c281b0

Please sign in to comment.