-
-
Notifications
You must be signed in to change notification settings - Fork 1
/
paperless.go
245 lines (207 loc) · 6.48 KB
/
paperless.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
package main
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"strings"
)
// PaperlessClient struct to interact with the Paperless-NGX API
type PaperlessClient struct {
BaseURL string
APIToken string
HTTPClient *http.Client
}
// NewPaperlessClient creates a new instance of PaperlessClient with a default HTTP client
func NewPaperlessClient(baseURL, apiToken string) *PaperlessClient {
return &PaperlessClient{
BaseURL: strings.TrimRight(baseURL, "/"),
APIToken: apiToken,
HTTPClient: &http.Client{},
}
}
// Do method to make requests to the Paperless-NGX API
func (c *PaperlessClient) Do(ctx context.Context, method, path string, body io.Reader) (*http.Response, error) {
url := fmt.Sprintf("%s/%s", c.BaseURL, strings.TrimLeft(path, "/"))
req, err := http.NewRequestWithContext(ctx, method, url, body)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", fmt.Sprintf("Token %s", c.APIToken))
// Set Content-Type if body is present
if body != nil {
req.Header.Set("Content-Type", "application/json")
}
return c.HTTPClient.Do(req)
}
// GetAllTags retrieves all tags from the Paperless-NGX API
func (c *PaperlessClient) GetAllTags(ctx context.Context) (map[string]int, error) {
tagIDMapping := make(map[string]int)
path := "api/tags/"
for path != "" {
resp, err := c.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error fetching tags: %d, %s", resp.StatusCode, string(bodyBytes))
}
var tagsResponse struct {
Results []struct {
ID int `json:"id"`
Name string `json:"name"`
} `json:"results"`
Next string `json:"next"`
}
err = json.NewDecoder(resp.Body).Decode(&tagsResponse)
if err != nil {
return nil, err
}
for _, tag := range tagsResponse.Results {
tagIDMapping[tag.Name] = tag.ID
}
// Extract relative path from the Next URL
if tagsResponse.Next != "" {
nextURL := tagsResponse.Next
if strings.HasPrefix(nextURL, c.BaseURL) {
nextURL = strings.TrimPrefix(nextURL, c.BaseURL+"/")
}
path = nextURL
} else {
path = ""
}
}
return tagIDMapping, nil
}
// GetDocumentsByTags retrieves documents that match the specified tags
func (c *PaperlessClient) GetDocumentsByTags(ctx context.Context, tags []string) ([]Document, error) {
tagQueries := make([]string, len(tags))
for i, tag := range tags {
tagQueries[i] = fmt.Sprintf("tag:%s", tag)
}
searchQuery := strings.Join(tagQueries, " ")
path := fmt.Sprintf("api/documents/?query=%s", urlEncode(searchQuery))
resp, err := c.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error searching documents: %d, %s", resp.StatusCode, string(bodyBytes))
}
var documentsResponse GetDocumentsApiResponse
err = json.NewDecoder(resp.Body).Decode(&documentsResponse)
if err != nil {
return nil, err
}
allTags, err := c.GetAllTags(ctx)
if err != nil {
return nil, err
}
documents := make([]Document, 0, len(documentsResponse.Results))
for _, result := range documentsResponse.Results {
tagNames := make([]string, len(result.Tags))
for i, resultTagID := range result.Tags {
for tagName, tagID := range allTags {
if resultTagID == tagID {
tagNames[i] = tagName
break
}
}
}
documents = append(documents, Document{
ID: result.ID,
Title: result.Title,
Content: result.Content,
Tags: tagNames,
})
}
return documents, nil
}
// DownloadPDF downloads the PDF file of the specified document
func (c *PaperlessClient) DownloadPDF(ctx context.Context, document Document) ([]byte, error) {
path := fmt.Sprintf("api/documents/%d/download/", document.ID)
resp, err := c.Do(ctx, "GET", path, nil)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("error downloading document %d: %d, %s", document.ID, resp.StatusCode, string(bodyBytes))
}
return io.ReadAll(resp.Body)
}
// UpdateDocuments updates the specified documents with suggested changes
func (c *PaperlessClient) UpdateDocuments(ctx context.Context, documents []DocumentSuggestion) error {
// Fetch all available tags
availableTags, err := c.GetAllTags(ctx)
if err != nil {
log.Printf("Error fetching available tags: %v", err)
return err
}
for _, document := range documents {
documentID := document.ID
updatedFields := make(map[string]interface{})
newTags := []int{}
tags := document.SuggestedTags
if len(tags) == 0 {
tags = document.OriginalDocument.Tags
}
// remove autoTag to prevent infinite loop (even if it is in the original tags)
tags = removeTagFromList(tags, autoTag)
// Map suggested tag names to IDs
for _, tagName := range tags {
if tagID, exists := availableTags[tagName]; exists {
// Skip the tag that we are filtering
if tagName == manualTag {
continue
}
newTags = append(newTags, tagID)
} else {
log.Printf("Tag '%s' does not exist in paperless-ngx, skipping.", tagName)
}
}
updatedFields["tags"] = newTags
suggestedTitle := document.SuggestedTitle
if len(suggestedTitle) > 128 {
suggestedTitle = suggestedTitle[:128]
}
if suggestedTitle != "" {
updatedFields["title"] = suggestedTitle
} else {
log.Printf("No valid title found for document %d, skipping.", documentID)
}
// Marshal updated fields to JSON
jsonData, err := json.Marshal(updatedFields)
if err != nil {
log.Printf("Error marshalling JSON for document %d: %v", documentID, err)
return err
}
// Send the update request using the generic Do method
path := fmt.Sprintf("api/documents/%d/", documentID)
resp, err := c.Do(ctx, "PATCH", path, bytes.NewBuffer(jsonData))
if err != nil {
log.Printf("Error updating document %d: %v", documentID, err)
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
bodyBytes, _ := io.ReadAll(resp.Body)
log.Printf("Error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
return fmt.Errorf("error updating document %d: %d, %s", documentID, resp.StatusCode, string(bodyBytes))
}
log.Printf("Document %d updated successfully.", documentID)
}
return nil
}
// urlEncode encodes a string for safe URL usage
func urlEncode(s string) string {
return strings.ReplaceAll(s, " ", "+")
}