forked from SebastiaanKlippert/go-wkhtmltopdf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wkhtmltopdf.go
402 lines (351 loc) · 10.5 KB
/
wkhtmltopdf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
// Package wkhtmltopdf contains wrappers around the wkhtmltopdf commandline tool
package wkhtmltopdf
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
)
// the cached mutexed path as used by findPath()
type stringStore struct {
val string
sync.Mutex
}
func (ss *stringStore) Get() string {
ss.Lock()
defer ss.Unlock()
return ss.val
}
func (ss *stringStore) Set(s string) {
ss.Lock()
ss.val = s
ss.Unlock()
}
var binPath stringStore
// SetPath sets the path to wkhtmltopdf
func SetPath(path string) {
binPath.Set(path)
}
// GetPath gets the path to wkhtmltopdf
func GetPath() string {
return binPath.Get()
}
// Page is the input struct for each page
type Page struct {
Input string
PageOptions
}
// InputFile returns the input string and is part of the page interface
func (p *Page) InputFile() string {
return p.Input
}
// Args returns the argument slice and is part of the page interface
func (p *Page) Args() []string {
return p.PageOptions.Args()
}
// Reader returns the io.Reader and is part of the page interface
func (p *Page) Reader() io.Reader {
return nil
}
// NewPage creates a new input page from a local or web resource (filepath or URL)
func NewPage(input string) *Page {
return &Page{
Input: input,
PageOptions: NewPageOptions(),
}
}
// PageReader is one input page (a HTML document) that is read from an io.Reader
// You can add only one Page from a reader
type PageReader struct {
Input io.Reader
PageOptions
}
// InputFile returns the input string and is part of the page interface
func (pr *PageReader) InputFile() string {
return "-"
}
// Args returns the argument slice and is part of the page interface
func (pr *PageReader) Args() []string {
return pr.PageOptions.Args()
}
// Reader returns the io.Reader and is part of the page interface
func (pr *PageReader) Reader() io.Reader {
return pr.Input
}
// NewPageReader creates a new PageReader from an io.Reader
func NewPageReader(input io.Reader) *PageReader {
return &PageReader{
Input: input,
PageOptions: NewPageOptions(),
}
}
// PageProvider is the interface which provides a single input page.
// Implemented by Page and PageReader.
type PageProvider interface {
Args() []string
InputFile() string
Reader() io.Reader
}
// PageOptions are options for each input page
type PageOptions struct {
pageOptions
headerAndFooterOptions
}
// Args returns the argument slice
func (po *PageOptions) Args() []string {
return append(append([]string{}, po.pageOptions.Args()...), po.headerAndFooterOptions.Args()...)
}
// NewPageOptions returns a new PageOptions struct with all options
func NewPageOptions() PageOptions {
return PageOptions{
pageOptions: newPageOptions(),
headerAndFooterOptions: newHeaderAndFooterOptions(),
}
}
// cover page
type cover struct {
Input string
pageOptions
}
// table of contents
type toc struct {
Include bool
allTocOptions
}
type allTocOptions struct {
pageOptions
tocOptions
headerAndFooterOptions
}
// PDFGenerator is the main wkhtmltopdf struct, always use NewPDFGenerator to obtain a new PDFGenerator struct
type PDFGenerator struct {
globalOptions
outlineOptions
Cover cover
TOC toc
OutputFile string //filename to write to, default empty (writes to internal buffer)
binPath string
outbuf bytes.Buffer
outWriter io.Writer
stdErr io.Writer
pages []PageProvider
}
// Args returns the commandline arguments as a string slice
func (pdfg *PDFGenerator) Args() []string {
args := append([]string{}, pdfg.globalOptions.Args()...)
args = append(args, pdfg.outlineOptions.Args()...)
if pdfg.Cover.Input != "" {
args = append(args, "cover")
args = append(args, pdfg.Cover.Input)
args = append(args, pdfg.Cover.pageOptions.Args()...)
}
if pdfg.TOC.Include {
args = append(args, "toc")
args = append(args, pdfg.TOC.pageOptions.Args()...)
args = append(args, pdfg.TOC.tocOptions.Args()...)
args = append(args, pdfg.TOC.headerAndFooterOptions.Args()...)
}
for _, page := range pdfg.pages {
args = append(args, "page")
args = append(args, page.InputFile())
args = append(args, page.Args()...)
}
if pdfg.OutputFile != "" {
args = append(args, pdfg.OutputFile)
} else {
args = append(args, "-")
}
return args
}
// ArgString returns Args as a single string
func (pdfg *PDFGenerator) ArgString() string {
return strings.Join(pdfg.Args(), " ")
}
// AddPage adds a new input page to the document.
// A page is an input HTML page, it can span multiple pages in the output document.
// It is a Page when read from file or URL or a PageReader when read from memory.
func (pdfg *PDFGenerator) AddPage(p PageProvider) {
pdfg.pages = append(pdfg.pages, p)
}
// SetPages resets all pages
func (pdfg *PDFGenerator) SetPages(p []PageProvider) {
pdfg.pages = p
}
// ResetPages drops all pages previously added by AddPage or SetPages.
// This allows reuse of current instance of PDFGenerator with all of it's configuration preserved.
func (pdfg *PDFGenerator) ResetPages() {
pdfg.pages = []PageProvider{}
}
// Buffer returns the embedded output buffer used if OutputFile is empty
func (pdfg *PDFGenerator) Buffer() *bytes.Buffer {
return &pdfg.outbuf
}
// Bytes returns the output byte slice from the output buffer used if OutputFile is empty
func (pdfg *PDFGenerator) Bytes() []byte {
return pdfg.outbuf.Bytes()
}
// SetOutput sets the output to write the PDF to, when this method is called, the internal buffer will not be used,
// so the Bytes(), Buffer() and WriteFile() methods will not work.
func (pdfg *PDFGenerator) SetOutput(w io.Writer) {
pdfg.outWriter = w
}
// SetStderr sets the output writer for Stderr when running the wkhtmltopdf command. You only need to call this when you
// want to print the output of wkhtmltopdf (like the progress messages in verbose mode). If not called, or if w is nil, the
// output of Stderr is kept in an internal buffer and returned as error message if there was an error when calling wkhtmltopdf.
func (pdfg *PDFGenerator) SetStderr(w io.Writer) {
pdfg.stdErr = w
}
// WriteFile writes the contents of the output buffer to a file
func (pdfg *PDFGenerator) WriteFile(filename string) error {
return os.WriteFile(filename, pdfg.Bytes(), 0666)
}
var lookPath = exec.LookPath
// findPath finds the path to wkhtmltopdf by
// - first looking in the current dir
// - looking in the PATH and PATHEXT environment dirs
// - using the WKHTMLTOPDF_PATH environment dir
// Warning: Running executables from the current path is no longer possible in Go 1.19
// See https://pkg.go.dev/os/exec@master#hdr-Executables_in_the_current_directory
// The path is cached, meaning you can not change the location of wkhtmltopdf in
// a running program once it has been found
func (pdfg *PDFGenerator) findPath() error {
const exe = "wkhtmltopdf"
pdfg.binPath = GetPath()
if pdfg.binPath != "" {
// wkhtmltopdf has already been found, return
return nil
}
exeDir, err := filepath.Abs(filepath.Dir(os.Args[0]))
if err != nil {
return err
}
path, err := lookPath(filepath.Join(exeDir, exe))
if err == nil && path != "" {
binPath.Set(path)
pdfg.binPath = path
return nil
}
path, err = lookPath(exe)
if errors.Is(err, exec.ErrDot) {
return err
}
if err == nil && path != "" {
binPath.Set(path)
pdfg.binPath = path
return nil
}
dir := os.Getenv("WKHTMLTOPDF_PATH")
if dir == "" {
return fmt.Errorf("%s not found", exe)
}
path, err = lookPath(filepath.Join(dir, exe))
if errors.Is(err, exec.ErrDot) {
return err
}
if err == nil && path != "" {
binPath.Set(path)
pdfg.binPath = path
return nil
}
return fmt.Errorf("%s not found", exe)
}
func (pdfg *PDFGenerator) checkDuplicateFlags() error {
// we currently can only have duplicates in the global options, so we only check these
var options []string
for _, arg := range pdfg.globalOptions.Args() {
if strings.HasPrefix(arg, "--") { // this is not ideal, the value could also have this prefix
for _, option := range options {
if option == arg {
return fmt.Errorf("duplicate argument: %s", arg)
}
}
options = append(options, arg)
}
}
return nil
}
// Create creates the PDF document and stores it in the internal buffer if no error is returned
func (pdfg *PDFGenerator) Create() error {
return pdfg.run(context.Background())
}
// CreateContext is Create with a context passed to exec.CommandContext when calling wkhtmltopdf
func (pdfg *PDFGenerator) CreateContext(ctx context.Context) error {
return pdfg.run(ctx)
}
func (pdfg *PDFGenerator) run(ctx context.Context) error {
// check for duplicate flags
err := pdfg.checkDuplicateFlags()
if err != nil {
return err
}
// create command
cmd := exec.CommandContext(ctx, pdfg.binPath, pdfg.Args()...)
// set stderr to the provided writer, or create a new buffer
var errBuf *bytes.Buffer
cmd.Stderr = pdfg.stdErr
if cmd.Stderr == nil {
errBuf = new(bytes.Buffer)
cmd.Stderr = errBuf
}
// set output to the desired writer or the internal buffer
if pdfg.outWriter != nil {
cmd.Stdout = pdfg.outWriter
} else {
pdfg.outbuf.Reset() // reset internal buffer when we use it
cmd.Stdout = &pdfg.outbuf
}
// if there is a pageReader page (from Stdin) we set Stdin to that reader
for _, page := range pdfg.pages {
if page.Reader() != nil {
cmd.Stdin = page.Reader()
break
}
}
// run cmd to create the PDF
err = cmd.Run()
if err != nil {
if ctxErr := ctx.Err(); ctxErr != nil {
return ctxErr
}
// on an error, return the contents of Stderr if it was our own buffer
// if Stderr was set to a custom writer, just return err
if errBuf != nil {
if errStr := errBuf.String(); strings.TrimSpace(errStr) != "" {
return errors.New(errStr)
}
}
return err
}
return nil
}
// NewPDFGenerator returns a new PDFGenerator struct with all options created and
// checks if wkhtmltopdf can be found on the system
func NewPDFGenerator() (*PDFGenerator, error) {
pdfg := NewPDFPreparer()
return pdfg, pdfg.findPath()
}
// NewPDFPreparer returns a PDFGenerator object without looking for the wkhtmltopdf executable file.
// This is useful to prepare a PDF file that is generated elsewhere and you just want to save the config as JSON.
// Note that Create() can not be called on this object unless you call SetPath yourself.
func NewPDFPreparer() *PDFGenerator {
return &PDFGenerator{
globalOptions: newGlobalOptions(),
outlineOptions: newOutlineOptions(),
Cover: cover{
pageOptions: newPageOptions(),
},
TOC: toc{
allTocOptions: allTocOptions{
tocOptions: newTocOptions(),
pageOptions: newPageOptions(),
headerAndFooterOptions: newHeaderAndFooterOptions(),
},
},
}
}