From 223e40d8872ef8780aed7716f573b5dd9835052e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Omar=20Vergara=20P=C3=A9rez?= Date: Wed, 3 Apr 2024 17:47:34 -0600 Subject: [PATCH 1/2] build(gomod): replace unipdf with go-fitz --- go.mod | 16 +++------------- go.sum | 41 ++--------------------------------------- 2 files changed, 5 insertions(+), 52 deletions(-) diff --git a/go.mod b/go.mod index 833396f..65598fe 100644 --- a/go.mod +++ b/go.mod @@ -5,31 +5,21 @@ go 1.21.3 require ( github.com/chai2010/webp v1.1.1 github.com/gabriel-vasile/mimetype v1.4.3 + github.com/gen2brain/go-fitz v1.23.7 github.com/go-chi/chi/v5 v5.0.10 github.com/signintech/gopdf v0.20.0 github.com/stretchr/testify v1.8.4 - github.com/unidoc/unipdf/v3 v3.53.0 golang.org/x/image v0.14.0 golang.org/x/text v0.14.0 ) require ( - github.com/adrg/strutil v0.1.0 // indirect - github.com/adrg/sysfont v0.1.1 // indirect - github.com/adrg/xdg v0.2.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect - github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect + github.com/kr/pretty v0.1.0 // indirect github.com/phpdave11/gofpdi v1.0.14-0.20211212211723-1f10f9844311 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/sirupsen/logrus v1.5.0 // indirect - github.com/unidoc/freetype v0.2.3 // indirect - github.com/unidoc/pkcs7 v0.2.0 // indirect - github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a // indirect - github.com/unidoc/unitype v0.2.1 // indirect - golang.org/x/crypto v0.15.0 // indirect golang.org/x/net v0.18.0 // indirect - golang.org/x/sys v0.14.0 // indirect - golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 91073f3..8eb6f1b 100644 --- a/go.sum +++ b/go.sum @@ -1,21 +1,13 @@ -github.com/adrg/strutil v0.1.0 h1:IOQnSOAjbE17+7l1lw4rXgX6JuSeJGdZa7BucTMV3Qg= -github.com/adrg/strutil v0.1.0/go.mod h1:pXRr2+IyX5AEPAF5icj/EeTaiflPSD2hvGjnguilZgE= -github.com/adrg/sysfont v0.1.1 h1:l9WKJNHsIpsfOhYIm1oSj+77837r/vls1MH17SH6gp0= -github.com/adrg/sysfont v0.1.1/go.mod h1:19nTHzfIn/HbngFMet+yNAvwSQYtOJYMI7vWexLWyNw= -github.com/adrg/xdg v0.2.1 h1:VSVdnH7cQ7V+B33qSJHTCRlNgra1607Q8PzEmnvb2Ic= -github.com/adrg/xdg v0.2.1/go.mod h1:ZuOshBmzV4Ta+s23hdfFZnBsdzmoR3US0d7ErpqSbTQ= github.com/chai2010/webp v1.1.1 h1:jTRmEccAJ4MGrhFOrPMpNGIJ/eybIgwKpcACsrTEapk= github.com/chai2010/webp v1.1.1/go.mod h1:0XVwvZWdjjdxpUEIf7b9g9VkHFnInUSYujwqTLEuldU= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= +github.com/gen2brain/go-fitz v1.23.7 h1:HPhzEVzmOINvCKqQgB/DwMzYh4ArIgy3tMwq1eJTcbg= +github.com/gen2brain/go-fitz v1.23.7/go.mod h1:HU04vc+RisUh/kvEd2pB0LAxmK1oyXdN4ftyshUr9rQ= github.com/go-chi/chi/v5 v5.0.10 h1:rLz5avzKpjqxrYwXNfmjkrYYXOyLJd37pz53UFHC6vk= github.com/go-chi/chi/v5 v5.0.10/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= -github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= -github.com/konsorten/go-windows-terminal-sequences v1.0.2 h1:DB17ag19krx9CFsz4o3enTrPXyIXCl+2iCXH/aMAp9s= -github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -30,45 +22,16 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/signintech/gopdf v0.20.0 h1:a1rArIMmQCAFzjjCqXPgxynTPkytMccPuGZlUU8Jorw= github.com/signintech/gopdf v0.20.0/go.mod h1:wrLtZoWaRNrS4hphED0oflFoa6IWkOu6M3nJjm4VbO4= -github.com/sirupsen/logrus v1.5.0 h1:1N5EYkVAPEywqZRJd7cwnRtCb6xJx7NH3T3WUTF980Q= -github.com/sirupsen/logrus v1.5.0/go.mod h1:+F7Ogzej0PZc/94MaYx/nvG9jOFMD2osvC3s+Squfpo= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/unidoc/freetype v0.2.3 h1:uPqW+AY0vXN6K2tvtg8dMAtHTEvvHTN52b72XpZU+3I= -github.com/unidoc/freetype v0.2.3/go.mod h1:mJ/Q7JnqEoWtajJVrV6S1InbRv0K/fJerPB5SQs32KI= -github.com/unidoc/pkcs7 v0.0.0-20200411230602-d883fd70d1df/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8= -github.com/unidoc/pkcs7 v0.2.0 h1:0Y0RJR5Zu7OuD+/l7bODXARn6b8Ev2G4A8lI4rzy9kg= -github.com/unidoc/pkcs7 v0.2.0/go.mod h1:UEzOZUEpJfDpywVJMUT8QiugqEZC29pDq7kdIZhWCr8= -github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a h1:RLtvUhe4DsUDl66m7MJ8OqBjq8jpWBXPK6/RKtqeTkc= -github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a/go.mod h1:j+qMWZVpZFTvDey3zxUkSgPJZEX33tDgU/QIA0IzCUw= -github.com/unidoc/unipdf/v3 v3.53.0 h1:xUgG/MH2iUHaar554w6OrL0qTKNy4AWzb19z0RyIExs= -github.com/unidoc/unipdf/v3 v3.53.0/go.mod h1:sEXO/chguUL1RAtyPnGos2BbCBGjL9dusmdT5b13eBg= -github.com/unidoc/unitype v0.2.1 h1:x0jMn7pB/tNrjEVjy3Ukpxo++HOBQaTCXcTYFA6BH3w= -github.com/unidoc/unitype v0.2.1/go.mod h1:mafyug7zYmDOusqa7G0dJV45qp4b6TDAN+pHN7ZUIBU= -golang.org/x/crypto v0.15.0 h1:frVn1TEaCEaZcn3Tmd7Y2b5KKPaZ+I32Q2OA3kYp5TA= -golang.org/x/crypto v0.15.0/go.mod h1:4ChreQoLWfG3xLDer1WdlH5NdlQ3+mwnQq1YTKY+72g= -golang.org/x/image v0.0.0-20211028202545-6944b10bf410/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= golang.org/x/image v0.14.0 h1:tNgSxAFe3jC4uYqvZdTr84SZoM1KfwdC9SKIFrLjFn4= golang.org/x/image v0.14.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE= golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= -golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200413165638-669c56c373c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= -golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From cff85701e9146dbfcd249f3ca9af9a899a9077c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Omar=20Vergara=20P=C3=A9rez?= Date: Wed, 3 Apr 2024 17:48:32 -0600 Subject: [PATCH 2/2] refactor(pdf): replace unipdf with go-fitz at the time to convert pages into images --- pkg/files/documents/pdf.go | 78 ++++++++------------------------------ 1 file changed, 16 insertions(+), 62 deletions(-) diff --git a/pkg/files/documents/pdf.go b/pkg/files/documents/pdf.go index f5c3216..d14162c 100644 --- a/pkg/files/documents/pdf.go +++ b/pkg/files/documents/pdf.go @@ -5,7 +5,6 @@ import ( "bytes" "errors" "fmt" - "image" "image/gif" "image/jpeg" "image/png" @@ -18,8 +17,7 @@ import ( "strings" "github.com/chai2010/webp" - "github.com/unidoc/unipdf/v3/model" - "github.com/unidoc/unipdf/v3/render" + "github.com/gen2brain/go-fitz" "golang.org/x/image/bmp" "golang.org/x/image/tiff" @@ -101,20 +99,11 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err switch strings.ToLower(fileType) { case imageType: // Creates a PDF Reader based on the pdf file. - pdfReader, err := model.NewPdfReader(bytes.NewReader(fileBytes)) + doc, err := fitz.NewFromMemory(fileBytes) if err != nil { return nil, fmt.Errorf("ConvertTo: error at opening the input pdf: %w", err) } - // Get the number of pages from the pdf file. - pages, err := pdfReader.GetNumPages() - if err != nil { - return nil, fmt.Errorf( - "ConvertTo: error at getting the number of pages from the input pdf: %w", - err, - ) - } - // Parses the file name of the Zip file. zipFileName := filepath.Join("/tmp", fmt.Sprintf( "%s.zip", @@ -133,27 +122,23 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err // Creates a Zip Writer to add files later on. zipWriter := zip.NewWriter(archive) - device := render.NewImageDevice() - // Set the image width. The height will be calculated accordingly. - device.OutputWidth = 2048 - - for pageNum := 1; pageNum <= pages; pageNum++ { + for n := 0; n < doc.NumPage(); n++ { // Parses the file name image. imgFileName := fmt.Sprintf( "%s_%d.%s", strings.TrimSuffix(p.filename, filepath.Ext(p.filename)), - pageNum, + n, subType, ) tmpImgFileMame := filepath.Join("/tmp", imgFileName) // Converts the current pdf page to an image.Image. - img, err := convertPDFPageToImage(pdfReader, device, pageNum) + img, err := doc.Image(n) if err != nil { return nil, fmt.Errorf( "ConvertTo: error at converting the pdf page number %d to image: %w", - pageNum, + n, err, ) } @@ -163,7 +148,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at storing the pdf image from the page #%d: %w", - pageNum, + n, err, ) } @@ -176,7 +161,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at encoding the pdf page %d as png: %w", - pageNum, + n, err, ) } @@ -185,7 +170,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at encoding the pdf page %d as jpeg: %w", - pageNum, + n, err, ) } @@ -194,7 +179,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at encoding the pdf page %d as gif: %w", - pageNum, + n, err, ) } @@ -203,7 +188,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at encoding the pdf page %d as webp: %w", - pageNum, + n, err, ) } @@ -212,7 +197,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at encoding the pdf page %d as tiff: %w", - pageNum, + n, err, ) } @@ -221,7 +206,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at encoding the pdf page %d as bmp: %w", - pageNum, + n, err, ) } @@ -234,7 +219,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at storing the pdf image from the page #%d: %w", - pageNum, + n, err, ) } @@ -245,7 +230,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if err != nil { return nil, fmt.Errorf( "ConvertTo: error at creating a zip writer to store the page #%d: %w", - pageNum, + n, err, ) } @@ -253,7 +238,7 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err if _, err := io.Copy(w1, imgFile); err != nil { return nil, fmt.Errorf( "ConvertTo: error at copying the content of the page #%d to the zipwriter: %w", - pageNum, + n, err, ) } @@ -401,37 +386,6 @@ func (p *Pdf) ConvertTo(fileType, subType string, fileBytes []byte) ([]byte, err return nil, errors.New("not implemented") } -// convertPDFPageToImage converts the pdf page to an image. -// The functions receives the pdf Reader, the Image Device and the page number. -// Returns a image.Image or an error if something goes wrong. -func convertPDFPageToImage( - pdfReader *model.PdfReader, - device *render.ImageDevice, - pageNum int, -) (image.Image, error) { - // Get the page based on the given page number. - page, err := pdfReader.GetPage(pageNum) - if err != nil { - return nil, fmt.Errorf( - "error at getting a page given a page number %d: %w", - pageNum, - err, - ) - } - - // Render returns an image.Image given a page. - img, err := device.Render(page) - if err != nil { - return nil, fmt.Errorf( - "error at converting the pdf page number %d to image: %w", - pageNum, - err, - ) - } - - return img, nil -} - // DocumentType returns the type of ducument of Pdf. func (p *Pdf) DocumentType() string { return PDF