diff --git a/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/ParsedText.java b/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/ParsedText.java index 4f7ef4ae1..46e2589d4 100644 --- a/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/ParsedText.java +++ b/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/ParsedText.java @@ -67,6 +67,35 @@ public class ParsedText extends ParsedTextImpl { */ private PdfString pdfText = null; + static protected ParsedText create(PdfString text, GraphicsState graphicsState, Matrix textMatrix) { + String decoded = ""; + byte[] bytes; + if (BaseFont.IDENTITY_H.equals(graphicsState.getFont().getEncoding())) { + if (graphicsState.getFont().hasUnicodeCMAP()) { + if (graphicsState.getFont().hasTwoByteUnicodeCMAP()) { + text = new PdfString(text.toString(), "IDENTITY_H2"); + } else { + text = new PdfString(text.toString(), "IDENTITY_H1"); + } + } else { + text = new PdfString(new String(text.getBytes(), StandardCharsets.UTF_16)); + } + bytes = text.toString().getBytes(StandardCharsets.UTF_16); + } else { + bytes = text.toString().getBytes(); + } + decoded = graphicsState.getFont().decode(bytes, 0, bytes.length); + char[] chars = decoded.toCharArray(); + float totalWidth = 0; + for (char c : chars) { + float w = graphicsState.getFont().getWidth(c) / 1000.0f; + float wordSpacing = Character.isSpaceChar(c) ? graphicsState.getWordSpacing() : 0f; + float blockWidth = (w * graphicsState.getFontSize() + graphicsState.getCharacterSpacing() + wordSpacing) + * graphicsState.getHorizontalScaling(); + totalWidth += blockWidth; + } + return new ParsedText(text, totalWidth, graphicsState, textMatrix); + } /** * This constructor should only be called when the origin for text display is at (0,0) and the graphical state @@ -76,8 +105,10 @@ public class ParsedText extends ParsedTextImpl { * @param graphicsState graphical state * @param textMatrix transform from text space to graphics (drawing space) */ - ParsedText(PdfString text, GraphicsState graphicsState, Matrix textMatrix) { - this(text, new GraphicsState(graphicsState), textMatrix.multiply(graphicsState.getCtm()), + private ParsedText(PdfString text, float unscaledWidth, GraphicsState graphicsState, + Matrix textMatrix) { + this(text, unscaledWidth, new GraphicsState(graphicsState), + textMatrix.multiply(graphicsState.getCtm()), getUnscaledFontSpaceWidth(graphicsState)); } @@ -85,33 +116,23 @@ public class ParsedText extends ParsedTextImpl { * Internal constructor for a parsed text item. The constructors that call it gather some information from the * graphical state first. * - * @param text This is a PdfString containing code points for the current font, not actually characters. If - * the font has multiByte glyphs, (Identity-H encoding) we reparse the string so that the code - * points don't get split into multiple characters. - * @param graphicsState graphical state - * @param textMatrix transform from text space to graphics (drawing space) - * @param unscaledWidth width of the space character in the font. + * @param text This is a PdfString containing code points for the current font, not actually + * characters. If the font has multiByte glyphs, (Identity-H encoding) we reparse the + * string so that the code points don't get split into multiple characters. + * @param graphicsState graphical state + * @param textMatrix transform from text space to graphics (drawing space) + * @param unscaledSpaceWidth width of the space character in the font. */ - private ParsedText(PdfString text, GraphicsState graphicsState, Matrix textMatrix, float unscaledWidth) { + private ParsedText(PdfString text, float unscaledWidth, GraphicsState graphicsState, + Matrix textMatrix, + float unscaledSpaceWidth) { super(null, pointToUserSpace(0, 0, textMatrix), - pointToUserSpace(getStringWidth(text.toString(), graphicsState), 0f, textMatrix), + pointToUserSpace(unscaledWidth, 0f, textMatrix), pointToUserSpace(1.0f, 0f, textMatrix), convertHeightToUser(graphicsState.getFontAscentDescriptor(), textMatrix), convertHeightToUser(graphicsState.getFontDescentDescriptor(), textMatrix), - convertWidthToUser(unscaledWidth, textMatrix)); - if (BaseFont.IDENTITY_H.equals(graphicsState.getFont().getEncoding())) { - if (graphicsState.getFont().hasUnicodeCMAP()) { - if (graphicsState.getFont().hasTwoByteUnicodeCMAP()) { - pdfText = new PdfString(text.toString(), "IDENTITY_H2"); - } else { - pdfText = new PdfString(text.toString(), "IDENTITY_H1"); - } - } else { - pdfText = new PdfString(new String(text.getBytes(), StandardCharsets.UTF_16)); - } - } else { - pdfText = text; - } + convertWidthToUser(unscaledSpaceWidth, textMatrix)); + pdfText = text; textToUserSpaceTransformMatrix = textMatrix; this.graphicsState = graphicsState; } @@ -199,22 +220,6 @@ private static float convertHeightToUser(float height, return distance(endPos, startPos); } - /** - * Decodes a Java String containing glyph ids encoded in the font's encoding, and determine the unicode equivalent - * - * @param in the String that needs to be decoded - * @return the decoded String - */ - // FIXME unreachable block and default encoding - protected String decode(String in) { - byte[] bytes; - if (BaseFont.IDENTITY_H.equals(graphicsState.getFont().getEncoding())) { - bytes = in.getBytes(StandardCharsets.UTF_16); - } - bytes = in.getBytes(); - return graphicsState.getFont().decode(bytes, 0, bytes.length); - } - /** * This constructor should only be called when the origin for text display is at (0,0) and the graphical state * reflects all transformations of the baseline. This is in text space units. @@ -258,7 +263,6 @@ public List getAsPartialWords() { for (int i = 0; i < chars.length; i++) { char c = chars[i]; float w = font.getWidth(c) / 1000.0f; - if (hasSpace[i]) { if (wordAccum.length() > 0) { result.add(createWord(wordAccum, wordStartOffset, totalWidth, getBaseline(), @@ -339,14 +343,6 @@ private Word createWord(StringBuffer wordAccum, getSingleSpaceWidth(), wordsAreComplete, currentBreakBefore); } - /** - * @param gs graphic state including current transformation to page coordinates from text measurement - * @return the unscaled (i.e. in Text space) width of our text - */ - public float getUnscaledTextWidth(GraphicsState gs) { - return getStringWidth(getFontCodes(), gs); - } - /** * {@inheritDoc} * diff --git a/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/PdfContentStreamHandler.java b/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/PdfContentStreamHandler.java index 8d2a7d16b..8c9eb9f54 100644 --- a/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/PdfContentStreamHandler.java +++ b/openpdf-core/src/main/java/org/openpdf/text/pdf/parser/PdfContentStreamHandler.java @@ -288,11 +288,11 @@ public CMapAwareDocumentFont getCurrentFont() { * @param string the text to display */ void displayPdfString(PdfString string) { - ParsedText renderInfo = new ParsedText(string, graphicsState(), textMatrix); + ParsedText renderInfo = ParsedText.create(string, graphicsState(), textMatrix); if (contextNames.peek() != null) { textFragments.add(renderInfo); } - textMatrix = new Matrix(renderInfo.getUnscaledTextWidth(graphicsState()), 0) + textMatrix = new Matrix(renderInfo.getWidth(), 0) .multiply(textMatrix); } @@ -966,7 +966,7 @@ public void invoke(List operands, PdfContentStreamHandler handler, Pd PdfName subType = stream.getAsName(PdfName.SUBTYPE); if (PdfName.FORM.equals(subType)) { PdfDictionary resources2 = stream.getAsDict(PdfName.RESOURCES); - if (resources2 == null) { + if (resources2 == null) { resources2 = resources; }