Skip to content

Commit

Permalink
naive approach
Browse files Browse the repository at this point in the history
  • Loading branch information
meadowingc committed Feb 12, 2024
1 parent f26c295 commit 05e6e7f
Show file tree
Hide file tree
Showing 14 changed files with 350 additions and 4 deletions.
13 changes: 9 additions & 4 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/go
// README at: https://github.com/devcontainers/templates/tree/main/src/ubuntu
{
"name": "Go - Fido",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/go:1-1.21-bullseye",
"postCpost_create.shreateCommand": "bash .devcontainer/",
"image": "mcr.microsoft.com/devcontainers/base:jammy",
"postCreateCommand": "bash .devcontainer/",
"features": {
"ghcr.io/devcontainers/features/go:1": {
"version": "latest"
}
}

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
Expand All @@ -13,7 +18,7 @@
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "go version",
// "postCreateCommand": "uname -a",

// Configure tool-specific properties.
// "customizations": {},
Expand Down
3 changes: 3 additions & 0 deletions .devcontainer/post_create.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,6 @@

go install github.com/go-task/task/v3/cmd/task@latest
go install github.com/cosmtrek/air@latest

sudo apt update && sudo apt install -y python3 python3-pip
pip3 install linkchecker
12 changes: 12 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for more information:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
# https://containers.dev/guide/dependabot

version: 2
updates:
- package-ecosystem: "devcontainers"
directory: "/"
schedule:
interval: weekly
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@
# Go workspace file
go.work

tmp

__debug_bin*
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Package",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}"
}
]
}
5 changes: 5 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
module codeberg.org/meadowingc/fido

go 1.22.0

require github.com/google/uuid v1.6.0
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
111 changes: 111 additions & 0 deletions linkchecker/linkchecker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package linkchecker

import (
"bytes"
"errors"
"log"
"os/exec"
"strings"
)

type LinkCheckResult struct {
Summary string
FoundErrors []LinkCheckError
}

type LinkCheckError struct {
Name string
ParentURL string
RealURL string
CheckTime string
Warning string
CheckingResult string
}

func getAnalysisValueForKey(key string, line string) string {
components := strings.Split(line, key)
value := strings.Join(components[1:], " ")
value = strings.TrimSpace(value)
return value
}

func CheckLink(link string) (LinkCheckResult, error) {
// linkchecker https://meadow.bearblog.dev -o csv --no-status --no-warnings --timeout 1200
cmd := exec.Command("linkchecker", link, "-o", "text", "--no-status", "--no-warnings", "--timeout", "1200")

var out bytes.Buffer
cmd.Stdout = &out

err := cmd.Run()

if err != nil {

if exitError, ok := err.(*exec.ExitError); ok {
// if exit code is 0 then we can return early since there were no issues
if exitError.ExitCode() == 0 {
return LinkCheckResult{
Summary: "No issues found",
}, nil
}

if exitError.ExitCode() == 2 {
return LinkCheckResult{}, errors.New("there was an error while checking for links")
}

// exit code of 1 is expected IF there were any issues
} else {
return LinkCheckResult{}, errors.New("there was an UNKNOWN error while checking for links")
}
}

outStr := out.String()

resultsChunk := strings.Split(outStr, "Start checking at")[1]
resultsChunk = strings.Split(resultsChunk, "Stopped checking at")[0]

summaryLine := strings.Split(resultsChunk, "That's it. ")[1]
summaryLine = strings.TrimSpace(summaryLine)

analysisResultsChunk := strings.Split(resultsChunk, "Statistics:")[0]
analysisResults := strings.TrimSpace(analysisResultsChunk)

resultsArray := strings.Split(analysisResults, "\n\n")

allCheckResults := []LinkCheckError{}

for _, result := range resultsArray {
linkCheckError := LinkCheckError{}

for _, line := range strings.Split(result, "\n") {
if strings.HasPrefix(line, "Real URL") {
linkCheckError.RealURL = getAnalysisValueForKey("URL", line)
} else if strings.HasPrefix(line, "Name") {
linkCheckError.Name = getAnalysisValueForKey("Name", line)
linkCheckError.Name = strings.TrimLeft(linkCheckError.Name, "`")
linkCheckError.Name = strings.TrimRight(linkCheckError.Name, "'")
} else if strings.HasPrefix(line, "Parent URL") {
linkCheckError.ParentURL = getAnalysisValueForKey("Parent URL", line)
linkCheckError.ParentURL = strings.Split(linkCheckError.ParentURL, ",")[0]
} else if strings.HasPrefix(line, "Check time") {
linkCheckError.CheckTime = getAnalysisValueForKey("Check time", line)
} else if strings.HasPrefix(line, "Warning") {
linkCheckError.Warning = getAnalysisValueForKey("Warning", line)
} else if strings.HasPrefix(line, " ") {
linkCheckError.Warning += " " + strings.TrimSpace(line)
} else if strings.HasPrefix(line, "Result") {
linkCheckError.CheckingResult = getAnalysisValueForKey("Result", line)
} else {
log.Println("Unknown line: ", line)
}
}

if linkCheckError.Name != "" {
allCheckResults = append(allCheckResults, linkCheckError)
}
}

return LinkCheckResult{
Summary: summaryLine,
FoundErrors: allCheckResults,
}, nil
}
70 changes: 70 additions & 0 deletions linkchecker/scheduler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package linkchecker

import (
"log"
"sync"
"time"

"github.com/google/uuid"
)

type CheckLinkRequest struct {
UUID string
PageURL string
}

type SchedulerResult struct {
UUID string
URL string
Status string
Result *LinkCheckResult
}

var (
uidToResultMap = make(map[string]*SchedulerResult)
mapLock = &sync.RWMutex{}
)

func GetResultForUUID(uid string) *SchedulerResult {
mapLock.RLock()
defer mapLock.RUnlock()
return uidToResultMap[uid]
}

func SubmitLinkForCheck(link string) string {
// TODO better for this to be the sha256 of the link
uid := uuid.New().String()

uidToResultMap[uid] = &SchedulerResult{
UUID: uid,
URL: link,
Status: "PENDING",
}

log.Printf("Submitted link %s for check with UUID %s", link, uid)

go func() {
result, err := CheckLink(link)
log.Printf("Link check for %s completed with %d errors", link, len(result.FoundErrors))

mapLock.Lock()
if err != nil {
uidToResultMap[uid].Status = err.Error()
return
}

uidToResultMap[uid].Result = &result
uidToResultMap[uid].Status = "COMPLETED"
mapLock.Unlock()

// keep the results around for 24 hours and then remove from the map
go func() {
<-time.After(24 * time.Hour)
mapLock.Lock()
delete(uidToResultMap, uid)
mapLock.Unlock()
}()
}()

return uid
}
60 changes: 60 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package main

import (
"fmt"
"html/template"
"net/http"

"codeberg.org/meadowingc/fido/linkchecker"
)

func main() {
templates := template.Must(template.ParseGlob("templates/*.tmpl.html"))

http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
err := templates.ExecuteTemplate(w, "home.tmpl.html", nil)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
})

http.HandleFunc("POST /submit-eval-request", func(w http.ResponseWriter, r *http.Request) {
// get submitted link from request and evaluate all links on that page
// for broken links
submittedLink := r.FormValue("link")
if submittedLink == "" {
http.Error(w, "Link is required", http.StatusBadRequest)
return
}

resultUUID := linkchecker.SubmitLinkForCheck(submittedLink)

// redirect to results page
http.Redirect(w, r, fmt.Sprintf("/result/%s", resultUUID), http.StatusSeeOther)
})

http.HandleFunc("/result/{operation_id}", func(w http.ResponseWriter, r *http.Request) {
opId := r.PathValue("operation_id")

// get result for operation ID
result := linkchecker.GetResultForUUID(opId)
if result == nil {
http.Error(w, "No result found for operation ID", http.StatusNotFound)
return
}

// TODO remove this
templates = template.Must(template.ParseGlob("templates/*.tmpl.html"))
err := templates.ExecuteTemplate(w, "result.tmpl.html", result)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
})

http.HandleFunc("/contact", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "Welcome to the contact page!")
})

fmt.Println("Server is running on http://localhost:8080")
http.ListenAndServe(":8080", nil)
}
4 changes: 4 additions & 0 deletions templates/footer.tmpl.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{{define "footer"}}
</body>
</html>
{{end}}
8 changes: 8 additions & 0 deletions templates/header.tmpl.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{define "header"}}
<!DOCTYPE html>
<html>
<head>
<title>Fido 🐶</title>
</head>
<body>
{{end}}
11 changes: 11 additions & 0 deletions templates/home.tmpl.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{{template "header"}}

<h1>Fido 🐶!</h1>

<form action="/submit-eval-request" method="post">
<label for="link">Link:</label><br>
<input type="text" id="link" name="link"><br>
<input type="submit" value="Submit">
</form>

{{template "footer"}}
37 changes: 37 additions & 0 deletions templates/result.tmpl.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{{template "header"}}

<h1>Results</h1>

<b>Status:</b> {{.Status}}

{{- if ne .Status "PENDING" -}}

{{$result := .Result}}
<p><b>Summary:</b> {{$result.Summary}}</p>

{{- if eq 0 (len $result.FoundErrors) -}}
<p>No broken links found! 🎉</p>
{{- end -}}

{{- range $i, $v := $result.FoundErrors -}}
<b>Link:</b> {{$i}}<br>
<ul>
<li><b>ParentURL:</b> <a target="_blank" href="{{$v.ParentURL}}">{{$v.ParentURL}}</a></li>
<li><b>Link Text:</b> <code>{{$v.Name}}</code></li>
<li><b>Broken URL:</b> <code>{{$v.RealURL}}</code></li>
<li><b>Error:</b> <code>{{$v.CheckingResult}}</code></li>
</ul>
{{- end -}}

{{- end -}}

{{- if eq .Status "PENDING" -}}
<script type="text/javascript">
// Reload the page every 5 seconds
setTimeout(function(){
window.location.reload(1);
}, 5000);
</script>
{{- end -}}

{{template "footer"}}

0 comments on commit 05e6e7f

Please sign in to comment.