Skip to content

Commit

Permalink
PDFs generated out of Google Docs create CID Widths with float unlike…
Browse files Browse the repository at this point in the history
… most cases

where integer is used as widths.
  • Loading branch information
sambitdash committed Nov 1, 2019
1 parent 6c6460e commit f98d449
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 12 deletions.
10 changes: 5 additions & 5 deletions src/PDFontMetrics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,15 @@ end
get_font_widths(basefonts::CosName) = read_afm(convert(CDTextString, basefonts))

function get_cid_font_widths(cosDoc::CosDoc, font::IDDRef{CosDict})
m = IntervalTree{UInt16, Int}()
m = IntervalTree{UInt16, Float32}()
encoding = cosDocGetObject(cosDoc, font, cn"Encoding")
desc = cosDocGetObject(cosDoc, font, cn"DescendantFonts") |> get
w = cosDocGetObject(cosDoc, desc[1], cn"W")
dw = cosDocGetObject(cosDoc, desc[1], cn"DW")
# If widths are not specified or the font encoding is not Identity-H
# widths cannot be extracted.
if w === CosNull || encoding != cn"Identity-H"
return (dw === CosNull) ? CIDWidth() : CIDWidth(get(dw))
return (dw === CosNull) ? CIDWidth() : CIDWidth(Float32(get(dw)))
end
w = get(w)
next = iterate(w)
Expand All @@ -201,16 +201,16 @@ function get_cid_font_widths(cosDoc::CosDoc, font::IDDRef{CosDict})
if ecid isa Vector
for wdo in ecid
width = get(wdo)
m[Interval(UInt16(ccid), UInt16(ccid))] = width
m[Interval(UInt16(ccid), UInt16(ccid))] = Float32(width)
ccid += 1
end
else
(width, state) = iterate(w, state)
m[Interval(UInt16(bcid), UInt16(ecid))] = get(width)
m[Interval(UInt16(bcid), UInt16(ecid))] = Float32(get(width))
end
next = iterate(w, state)
end
return (dw === CosNull) ? CIDWidth(m) : CIDWidth(m, get(dw))
return (dw === CosNull) ? CIDWidth(m) : CIDWidth(m, Float32(get(dw)))
end

get_character_width(n::CosName, afm::AdobeFontMetrics) =
Expand Down
11 changes: 5 additions & 6 deletions src/PDFonts.jl
Original file line number Diff line number Diff line change
Expand Up @@ -389,14 +389,13 @@ function read_cmap(stm::IO)
end

struct CIDWidth
imap::IntervalTree{UInt16, Int}
dw::Int
CIDWidth(m::IntervalTree{UInt16, Int}, tdw::Int) = new(m, tdw)
imap::IntervalTree{UInt16, Float32}
dw::Float32
end

CIDWidth(m::IntervalTree{UInt16, Int}) = CIDWidth(m, 1000)
CIDWidth(tdw::Int) = CIDWidth(IntervalTree{UInt16, Int}(), tdw)
CIDWidth() = CIDWidth(1000)
CIDWidth(m::IntervalTree{UInt16, Float32}) = CIDWidth(m, 1000f0)
CIDWidth(tdw::Float32) = CIDWidth(IntervalTree{UInt16, Float32}(), tdw)
CIDWidth() = CIDWidth(1000f0)

mutable struct PDFont
doc::PDDoc
Expand Down
20 changes: 19 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ using PDFIO.Common: read_pkcs12

include("debugIO.jl")

pdftest_ver = "0.0.6"
pdftest_ver = "0.0.7"
pdftest_link = "https://github.com/sambitdash/PDFTest/archive/v"*pdftest_ver

zipfile = "pdftest-"*pdftest_ver
Expand Down Expand Up @@ -364,6 +364,24 @@ end
end
end

@testset "Floating point CIDWidth" begin
@test begin
filename="sample-google-doc.pdf"
DEBUG && println(filename)
resfile, template, filename = local_testfiles(filename)
doc = pdDocOpen(filename)
io = util_open(resfile, "w")
try
extract_text(io, doc)
finally
util_close(io)
pdDocClose(doc)
end
@test files_equal(resfile, template)
length(utilPrintOpenFiles()) == 0
end
end

@testset "Non-standard CMap" begin
@test begin
filename="16-969_o7jp.pdf"
Expand Down

0 comments on commit f98d449

Please sign in to comment.