Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Generate static appearance for fields #1864

Merged
merged 24 commits into from
Jun 11, 2023
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@
from .constants import CatalogDictionary as CD
from .constants import (
CheckboxRadioButtonAttributes,
FieldDictionaryAttributes,
GoToActionArguments,
)
from .constants import Core as CO
from .constants import DocumentInformationAttributes as DI
from .constants import FieldDictionaryAttributes as FA
from .constants import PageAttributes as PG
from .constants import PagesAttributes as PA
from .constants import TrailerKeys as TK
Expand Down Expand Up @@ -542,7 +542,7 @@ def get_fields(
default, the mapping name is used for keys.
``None`` if form data could not be located.
"""
field_attributes = FieldDictionaryAttributes.attributes_dict()
field_attributes = FA.attributes_dict()
field_attributes.update(CheckboxRadioButtonAttributes.attributes_dict())
if retval is None:
retval = {}
Expand Down Expand Up @@ -626,6 +626,26 @@ def _build_field(
self._write_field(fileobj, field, field_attributes)
fileobj.write("\n")
retval[key] = Field(field)
obj = retval[key].indirect_reference.get_object() # to get the full object
if obj.get(FA.FT, "") == "/Ch":
retval[key][NameObject("/_States_")] = obj[NameObject(FA.Opt)]
if obj.get(FA.FT, "") == "/Btn" and "/AP" in obj:
# Checkbox
retval[key][NameObject("/_States_")] = ArrayObject(
list(obj["/AP"]["/N"].keys())
)
if "/Off" not in retval[key]["/_States_"]:
retval[key][NameObject("/_States_")].append(NameObject("/Off"))
elif obj.get(FA.FT, "") == "/Btn" and obj.get(FA.Ff, 0) & FA.FfBits.Radio != 0:
states = []
for k in obj.get(FA.Kids, {}):
k = k.get_object()
for s in list(k["/AP"]["/N"].keys()):
if s not in states:
states.append(s)
retval[key][NameObject("/_States_")] = ArrayObject(states)
if obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0:
del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")]

def _check_kids(
self, tree: Union[TreeObject, DictionaryObject], retval: Any, fileobj: Any
Expand All @@ -636,20 +656,20 @@ def _check_kids(
self.get_fields(kid.get_object(), retval, fileobj)

def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
field_attributes_tuple = FieldDictionaryAttributes.attributes()
field_attributes_tuple = FA.attributes()
field_attributes_tuple = (
field_attributes_tuple + CheckboxRadioButtonAttributes.attributes()
)

for attr in field_attributes_tuple:
if attr in (
FieldDictionaryAttributes.Kids,
FieldDictionaryAttributes.AA,
FA.Kids,
FA.AA,
):
continue
attr_name = field_attributes[attr]
try:
if attr == FieldDictionaryAttributes.FT:
if attr == FA.FT:
# Make the field type value more clear
types = {
"/Btn": "Button",
Expand All @@ -659,12 +679,12 @@ def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None:
}
if field[attr] in types:
fileobj.write(f"{attr_name}: {types[field[attr]]}\n")
elif attr == FieldDictionaryAttributes.Parent:
elif attr == FA.Parent:
# Let's just write the name of the parent
try:
name = field[attr][FieldDictionaryAttributes.TM]
name = field[attr][FA.TM]
except KeyError:
name = field[attr][FieldDictionaryAttributes.T]
name = field[attr][FA.T]
fileobj.write(f"{attr_name}: {name}\n")
else:
fileobj.write(f"{attr_name}: {field[attr]}\n")
Expand Down
210 changes: 154 additions & 56 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import decimal
import enum
import hashlib
import logging
import re
import struct
import uuid
Expand Down Expand Up @@ -72,10 +71,10 @@
deprecation_with_replacement,
logger_warning,
)
from .constants import AnnotationDictionaryAttributes as AA
from .constants import CatalogAttributes as CA
from .constants import (
AnnotationDictionaryAttributes,
CatalogDictionary,
FieldDictionaryAttributes,
FieldFlag,
FileSpecificationDictionaryEntries,
GoToActionArguments,
Expand All @@ -84,9 +83,11 @@
TypFitArguments,
UserAccessPermissions,
)
from .constants import CatalogAttributes as CA
from .constants import Core as CO
from .constants import EncryptionDictAttributes as ED
from .constants import (
FieldDictionaryAttributes as FA,
)
from .constants import PageAttributes as PG
from .constants import PagesAttributes as PA
from .constants import StreamAttributes as SA
Expand Down Expand Up @@ -127,9 +128,6 @@
ZoomArgType,
)

logger = logging.getLogger(__name__)


OPTIONAL_READ_WRITE_FIELD = FieldFlag(0)
ALL_DOCUMENT_PERMISSIONS = UserAccessPermissions((2**31 - 1) - 3)

Expand Down Expand Up @@ -332,7 +330,20 @@ def _add_page(
pages[NameObject(PA.COUNT)] = NumberObject(page_count + 1)
return page

def set_need_appearances_writer(self) -> None:
def set_need_appearances_writer(self, state: bool = True) -> None:
pubpub-zz marked this conversation as resolved.
Show resolved Hide resolved
"""
Sets the "NeedAppearances" flag in the PDF writer.

The "NeedAppearances" flag indicates whether the appearance dictionary
for form fields should be automatically generated by the PDF viewer or
if the embedded appearence should be used.

Args:
state: The actual value of the NeedAppearances flag.

Returns:
None
"""
# See 12.7.2 and 7.7.2 for more information:
# http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
try:
Expand All @@ -343,9 +354,13 @@ def set_need_appearances_writer(self) -> None:
] = self._add_object(DictionaryObject())

need_appearances = NameObject(InteractiveFormDictEntries.NeedAppearances)
self._root_object[CatalogDictionary.ACRO_FORM][need_appearances] = BooleanObject(True) # type: ignore
except Exception as exc:
logger.error("set_need_appearances_writer() catch : %s", repr(exc))
cast(DictionaryObject, self._root_object[CatalogDictionary.ACRO_FORM])[
need_appearances
] = BooleanObject(state)
except Exception as exc: # pragma: no cover
logger_warning(
f"set_need_appearances_writer({state}) catch : {exc}", __name__
)

def add_page(
self,
Expand Down Expand Up @@ -801,11 +816,96 @@ def _get_qualified_field_name(self, parent: DictionaryObject) -> Optional[str]:
return qualified_parent + "." + cast(str, parent["/T"])
return cast(str, parent["/T"])

def _update_text_field(self, field: DictionaryObject) -> None:
# Calculate rectangle dimensions
_rct = cast(RectangleObject, field[AA.Rect])
rct = RectangleObject((0, 0, _rct[2] - _rct[0], _rct[3] - _rct[1]))

# Extract font information
font_properties: Any = (
cast(str, field[AA.DA]).replace("\n", " ").replace("\r", " ").split(" ")
)
font_name = font_properties[font_properties.index("Tf") - 2]
font_height = float(font_properties[font_properties.index("Tf") - 1])
y_offset = rct.height - 1 - font_height

# Retrieve field text and selected values
field_flags = field.get(FA.Ff, 0)
if field.get(FA.FT, "/Tx") == "/Ch" and field_flags & FA.FfBits.Combo == 0:
txt = "\n".join(field.get(FA.Opt, {}))
sel = field.get("/V", [])
if not isinstance(sel, list):
sel = [sel]
else: # /Tx
txt = field.get("/V", "")
sel = []

# Generate appearance stream
ap_stream = f"q\n/Tx BMC \nq\n1 1 {rct.width - 1} {rct.height - 1} re\nW\nBT\n{field[AA.DA]}\n".encode()
for line_number, line in enumerate(txt.replace("\n", "\r").split("\r")):
if line in sel:
# may be improved but can not find how get fill working => replaced with lined box
ap_stream += (
f"1 {y_offset - (line_number * font_height * 1.4) - 1} {rct.width - 2} {font_height + 2} re\n"
f"0.5 0.5 0.5 rg s\n{field[AA.DA]}\n"
).encode()
if line_number == 0:
ap_stream += f"2 {y_offset} Td\n".encode()
else:
# Td is a relative translation
ap_stream += f"0 {- font_height * 1.4} Td\n".encode()
ap_stream += b"(" + str(line).encode("UTF-8") + b") Tj\n"
ap_stream += b"ET\nQ\nEMC\nQ\n"

# Create appearance dictionary
dct = DecodedStreamObject.initialize_from_dictionary(
{
NameObject("/Type"): NameObject("/XObject"),
NameObject("/Subtype"): NameObject("/Form"),
NameObject("/BBox"): rct,
"__streamdata__": ByteStringObject(ap_stream),
"/Length": 0,
}
)

# Retrieve font information from AcroForm dictionary
dr: Any = cast(
dict, cast(DictionaryObject, self._root_object["/AcroForm"]).get("/DR", {})
)
if isinstance(dr, IndirectObject):
dr = dr.get_object()
dr = dr.get("/Font", {})
if isinstance(dr, IndirectObject):
dr = dr.get_object()

# Update Resources with font information if necessary
if font_name in dr:
dct[NameObject("/Resources")] = DictionaryObject(
{
NameObject("/Font"): DictionaryObject(
{NameObject(font_name): dr[font_name].indirect_reference}
)
}
)
if AA.AP not in field:
field[NameObject(AA.AP)] = DictionaryObject(
{NameObject("/N"): self._add_object(dct)}
)
elif "/N" not in cast(DictionaryObject, field[AA.AP]):
cast(DictionaryObject, field[NameObject(AA.AP)])[
NameObject("/N")
] = self._add_object(dct)
else: # [/AP][/N] exists
n = field[AA.AP]["/N"].indirect_reference.idnum # type: ignore
self._objects[n - 1] = dct
dct.indirect_reference = IndirectObject(n, 0, self)

def update_page_form_field_values(
self,
page: PageObject,
fields: Dict[str, Any],
flags: FieldFlag = OPTIONAL_READ_WRITE_FIELD,
auto_regen: Optional[bool] = True,
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
"""
Update the form field values for a given page from a fields dictionary.
Expand All @@ -821,57 +921,57 @@ def update_page_form_field_values(
flags: An integer (0 to 7). The first bit sets ReadOnly, the
second bit sets Required, the third bit sets NoExport. See
PDF Reference Table 8.70 for details.
auto_regen: set/unset the need_appearances flag ;
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
the flag is unchanged if auto_regen is None
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
"""
self.set_need_appearances_writer()
if isinstance(auto_regen, bool):
self.set_need_appearances_writer(auto_regen)
MartinThoma marked this conversation as resolved.
Show resolved Hide resolved
# Iterate through pages, update field values
if PG.ANNOTS not in page:
logger_warning("No fields to update on this page", __name__)
return
for j in range(len(page[PG.ANNOTS])): # type: ignore
writer_annot = page[PG.ANNOTS][j].get_object() # type: ignore
for writer_annot in page[PG.ANNOTS]: # type: ignore
writer_annot = cast(DictionaryObject, writer_annot.get_object())
# retrieve parent field values, if present
writer_parent_annot = DictionaryObject() # fallback if it's not there
if PG.PARENT in writer_annot:
writer_parent_annot = writer_annot[PG.PARENT]
for field in fields:
writer_parent_annot = writer_annot.get(
PG.PARENT, DictionaryObject()
).get_object()
for field, value in fields.items():
if (
writer_annot.get(FieldDictionaryAttributes.T) == field
writer_annot.get(FA.T) == field
or self._get_qualified_field_name(writer_annot) == field
):
if writer_annot.get(FieldDictionaryAttributes.FT) == "/Btn":
writer_annot.update(
{
NameObject(
AnnotationDictionaryAttributes.AS
): NameObject(fields[field])
}
)
writer_annot.update(
{
NameObject(FieldDictionaryAttributes.V): TextStringObject(
fields[field]
)
}
)
if isinstance(value, list):
lst = ArrayObject()
for v in value:
lst.append(TextStringObject(v))
writer_annot[NameObject(FA.V)] = lst
else:
writer_annot[NameObject(FA.V)] = TextStringObject(value)
if writer_annot.get(FA.FT) in ("/Btn"):
# case of Checkbox button (no /FT found in Radio widgets
writer_annot[NameObject(AA.AS)] = NameObject(value)
elif (
writer_annot.get(FA.FT) == "/Tx"
or writer_annot.get(FA.FT) == "/Ch"
):
# textbox
self._update_text_field(writer_annot)
elif writer_annot.get(FA.FT) == "/Sig":
# signature
logger_warning("Signature forms not implemented yet", __name__)
if flags:
writer_annot.update(
{
NameObject(FieldDictionaryAttributes.Ff): NumberObject(
flags
)
}
)
writer_annot[NameObject(FA.Ff)] = NumberObject(flags)
elif (
writer_parent_annot.get(FieldDictionaryAttributes.T) == field
writer_parent_annot.get(FA.T) == field
or self._get_qualified_field_name(writer_parent_annot) == field
):
writer_parent_annot.update(
{
NameObject(FieldDictionaryAttributes.V): TextStringObject(
fields[field]
)
}
)
writer_parent_annot[NameObject(FA.V)] = TextStringObject(value)
for k in writer_parent_annot[NameObject(FA.Kids)]:
k = k.get_object()
k[NameObject(AA.AS)] = NameObject(
value if value in k[AA.AP]["/N"] else "/Off"
)

def updatePageFormFieldValues(
self,
Expand Down Expand Up @@ -2166,14 +2266,12 @@ def add_uri(
lnk = DictionaryObject()
lnk.update(
{
NameObject(AnnotationDictionaryAttributes.Type): NameObject(PG.ANNOTS),
NameObject(AnnotationDictionaryAttributes.Subtype): NameObject("/Link"),
NameObject(AnnotationDictionaryAttributes.P): page_link,
NameObject(AnnotationDictionaryAttributes.Rect): rect,
NameObject(AA.Type): NameObject(PG.ANNOTS),
NameObject(AA.Subtype): NameObject("/Link"),
NameObject(AA.P): page_link,
NameObject(AA.Rect): rect,
NameObject("/H"): NameObject("/I"),
NameObject(AnnotationDictionaryAttributes.Border): ArrayObject(
border_arr
),
NameObject(AA.Border): ArrayObject(border_arr),
NameObject("/A"): lnk2,
}
)
Expand Down
Loading