Skip to content

Commit

Permalink
Initial support for auto-tagging
Browse files Browse the repository at this point in the history
  • Loading branch information
icereed committed Sep 23, 2024
1 parent 47275e2 commit 186598a
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 8 deletions.
130 changes: 125 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,12 @@ type GetDocumentsApiResponse struct {
}

type Document struct {
ID int `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Tags []int `json:"tags"`
SuggestedTitle string `json:"suggested_title,omitempty"`
ID int `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
Tags []int `json:"tags"`
SuggestedTitle string `json:"suggested_title,omitempty"`
SuggestedTags []string `json:"suggested_tags,omitempty"`
}

var (
Expand Down Expand Up @@ -130,6 +131,46 @@ func createLLM() (llms.Model, error) {
}
}

func getAllTags(ctx context.Context, baseURL, apiToken string) (map[string]int, error) {
url := fmt.Sprintf("%s/api/tags/", baseURL)
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Token %s", apiToken))

client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("Error fetching tags: %d, %s", resp.StatusCode, string(bodyBytes))
}

var tagsResponse struct {
Results []struct {
ID int `json:"id"`
Name string `json:"name"`
} `json:"results"`
}

err = json.NewDecoder(resp.Body).Decode(&tagsResponse)
if err != nil {
return nil, err
}

tagIDMapping := make(map[string]int)
for _, tag := range tagsResponse.Results {
tagIDMapping[tag.Name] = tag.ID
}

return tagIDMapping, nil
}

// documentsHandler returns documents with the specific tag
func documentsHandler(c *gin.Context) {
ctx := c.Request.Context()
Expand Down Expand Up @@ -290,6 +331,18 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
return nil, fmt.Errorf("failed to create LLM client: %v", err)
}

// Fetch all available tags from paperless-ngx
availableTags, err := getAllTags(ctx, paperlessBaseURL, paperlessAPIToken)
if err != nil {
return nil, fmt.Errorf("failed to fetch available tags: %v", err)
}

// Prepare a list of tag names
availableTagNames := make([]string, 0, len(availableTags))
for tagName := range availableTags {
availableTagNames = append(availableTagNames, tagName)
}

var wg sync.WaitGroup
var mu sync.Mutex
errors := make([]error, 0)
Expand All @@ -315,8 +368,18 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
return
}

suggestedTags, err := getSuggestedTags(ctx, llm, content, suggestedTitle, availableTagNames)
if err != nil {
mu.Lock()
errors = append(errors, fmt.Errorf("Document %d: %v", documentID, err))
mu.Unlock()
log.Printf("Error generating tags for document %d: %v", documentID, err)
return
}

mu.Lock()
doc.SuggestedTitle = suggestedTitle
doc.SuggestedTags = suggestedTags
mu.Unlock()
log.Printf("Document %d processed successfully.", documentID)
}(&documents[i])
Expand All @@ -331,6 +394,47 @@ func processDocuments(ctx context.Context, documents []Document) ([]Document, er
return documents, nil
}

func getSuggestedTags(ctx context.Context, llm llms.Model, content string, suggestedTitle string, availableTags []string) ([]string, error) {
likelyLanguage := os.Getenv("LLM_LANGUAGE")
if likelyLanguage == "" {
likelyLanguage = "English"
}

prompt := fmt.Sprintf(`I will provide you with the content and suggested title of a document. Your task is to select appropriate tags for the document from the list of available tags I will provide. Only select tags from the provided list. Respond only with the selected tags as a comma-separated list, without any additional information. The content is likely in %s.
Available Tags:
%s
Suggested Title:
%s
Content:
%s
`, likelyLanguage, strings.Join(availableTags, ", "), suggestedTitle, content)

completion, err := llm.GenerateContent(ctx, []llms.MessageContent{
{
Parts: []llms.ContentPart{
llms.TextContent{
Text: prompt,
},
},
Role: llms.ChatMessageTypeHuman,
},
})
if err != nil {
return nil, fmt.Errorf("Error getting response from LLM: %v", err)
}

response := strings.TrimSpace(completion.Choices[0].Content)
suggestedTags := strings.Split(response, ",")
for i, tag := range suggestedTags {
suggestedTags[i] = strings.TrimSpace(tag)
}

return suggestedTags, nil
}

func getSuggestedTitle(ctx context.Context, llm llms.Model, content string) (string, error) {
likelyLanguage, ok := os.LookupEnv("LLM_LANGUAGE")
if !ok {
Expand Down Expand Up @@ -366,6 +470,13 @@ Content:
func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []Document, paperlessGptTagID int) error {
client := &http.Client{}

// Fetch all available tags
availableTags, err := getAllTags(ctx, baseURL, apiToken)
if err != nil {
log.Printf("Error fetching available tags: %v", err)
return err
}

for _, document := range documents {
documentID := document.ID

Expand All @@ -378,6 +489,15 @@ func updateDocuments(ctx context.Context, baseURL, apiToken string, documents []
}
}

// Map suggested tag names to IDs
for _, tagName := range document.SuggestedTags {
if tagID, exists := availableTags[tagName]; exists {
newTags = append(newTags, tagID)
} else {
log.Printf("Tag '%s' does not exist in paperless-ngx, skipping.", tagName)
}
}

updatedFields["tags"] = newTags

suggestedTitle := document.SuggestedTitle
Expand Down
50 changes: 47 additions & 3 deletions web-app/src/components/DocumentProcessor.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ interface Document {
title: string;
content: string;
suggested_title?: string;
suggested_tags?: string[];
}

const DocumentProcessor: React.FC = () => {
Expand All @@ -30,7 +31,7 @@ const DocumentProcessor: React.FC = () => {
} catch (error) {
console.error("Error fetching filter tag:", error);
}
}
};

const fetchDocuments = async () => {
try {
Expand Down Expand Up @@ -90,6 +91,24 @@ const DocumentProcessor: React.FC = () => {
}
}, [documents]);

const handleDelete = (i, docId) => {
const updatedDocuments = documents.map((d) =>
d.id === docId
? { ...d, suggested_tags: d.suggested_tags.filter((tag, index) => index !== i) }
: d
);
setDocuments(updatedDocuments);
};

const handleAddition = (tag, docId) => {
const updatedDocuments = documents.map((d) =>
d.id === docId
? { ...d, suggested_tags: [...d.suggested_tags, tag] }
: d
);
setDocuments(updatedDocuments);
};

if (loading) {
return (
<div className="flex items-center justify-center h-screen">
Expand All @@ -109,8 +128,10 @@ const DocumentProcessor: React.FC = () => {
<div className="flex items-center justify-center h-screen">
<div className="text-xl font-semibold">
No documents found with filter tag{" "}
<span className="bg-blue-100 text-blue-800 text-sm font-medium me-2 px-2.5 py-0.5 rounded dark:bg-blue-900 dark:text-blue-300bg-blue-100 text-blue-800 text-xs font-medium me-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300">{filterTag}</span>
{" "}found. Try{" "}
<span className="bg-blue-100 text-blue-800 text-sm font-medium me-2 px-2.5 py-0.5 rounded dark:bg-blue-900 dark:text-blue-300bg-blue-100 text-blue-800 text-xs font-medium me-2 px-2.5 py-0.5 rounded-full dark:bg-blue-900 dark:text-blue-300">
{filterTag}
</span>{" "}
found. Try{" "}
<button
onClick={() => {
setDocuments([]);
Expand Down Expand Up @@ -199,6 +220,9 @@ const DocumentProcessor: React.FC = () => {
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
Suggested Title
</th>
<th className="px-4 py-2 text-left text-sm font-medium text-gray-500">
Suggested Tags
</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
Expand Down Expand Up @@ -227,6 +251,26 @@ const DocumentProcessor: React.FC = () => {
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
/>
</td>
<td className="px-4 py-3 text-sm text-gray-900">
<input
type="text"
value={doc.suggested_tags?.join(", ")}
onChange={(e) => {
const updatedDocuments = documents.map((d) =>
d.id === doc.id
? {
...d,
suggested_tags: e.target.value
.split(",")
.map((tag) => tag.trim()),
}
: d
);
setDocuments(updatedDocuments);
}}
className="w-full border border-gray-300 rounded px-2 py-1 focus:outline-none focus:ring-2 focus:ring-blue-500"
/>
</td>
</tr>
)
)}
Expand Down

0 comments on commit 186598a

Please sign in to comment.