-
Notifications
You must be signed in to change notification settings - Fork 101
/
Copy pathpdf_extract_images.go
123 lines (103 loc) · 2.78 KB
/
pdf_extract_images.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
* Extract images from a PDF file. Passes through each page, goes through the content stream and finds instances of both
* XObject Images and inline images. Also handles images referred within XObject Form content streams.
* The output files are saved as a zip archive.
*
* Run as: go run pdf_extract_images.go input.pdf output.zip
*/
package main
import (
"archive/zip"
"fmt"
"image/jpeg"
"os"
"github.com/unidoc/unipdf/v3/common/license"
"github.com/unidoc/unipdf/v3/extractor"
"github.com/unidoc/unipdf/v3/model"
)
func init() {
// Make sure to load your metered License API key prior to using the library.
// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
if err != nil {
panic(err)
}
}
func main() {
if len(os.Args) < 3 {
fmt.Printf("Syntax: go run pdf_extract_images.go input.pdf output.zip\n")
os.Exit(1)
}
inputPath := os.Args[1]
outputPath := os.Args[2]
fmt.Printf("Input file: %s\n", inputPath)
err := extractImagesToArchive(inputPath, outputPath)
if err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
}
// Extracts images and properties of a PDF specified by inputPath.
// The output images are stored into a zip archive whose path is given by outputPath.
func extractImagesToArchive(inputPath, outputPath string) error {
pdfReader, f, err := model.NewPdfReaderFromFile(inputPath, nil)
if err != nil {
return err
}
defer f.Close()
numPages, err := pdfReader.GetNumPages()
if err != nil {
return err
}
fmt.Printf("PDF Num Pages: %d\n", numPages)
// Prepare output archive.
zipf, err := os.Create(outputPath)
if err != nil {
return err
}
defer zipf.Close()
zipw := zip.NewWriter(zipf)
totalImages := 0
for i := 0; i < numPages; i++ {
fmt.Printf("-----\nPage %d:\n", i+1)
page, err := pdfReader.GetPage(i + 1)
if err != nil {
return err
}
pextract, err := extractor.New(page)
if err != nil {
return err
}
pimages, err := pextract.ExtractPageImages(nil)
if err != nil {
return err
}
fmt.Printf("%d Images\n", len(pimages.Images))
for idx, img := range pimages.Images {
fmt.Printf("Image %d - X: %.2f Y: %.2f, Width: %.2f, Height: %.2f\n",
totalImages+idx+1, img.X, img.Y, img.Width, img.Height)
fname := fmt.Sprintf("p%d_%d.jpg", i+1, idx)
gimg, err := img.Image.ToGoImage()
if err != nil {
return err
}
imgf, err := zipw.Create(fname)
if err != nil {
return err
}
opt := jpeg.Options{Quality: 80}
err = jpeg.Encode(imgf, gimg, &opt)
if err != nil {
return err
}
}
totalImages += len(pimages.Images)
}
fmt.Printf("Total: %d images\n", totalImages)
// Make sure to check the error on Close.
err = zipw.Close()
if err != nil {
return err
}
return nil
}