From c239073d64b3c21b1737f4c15267038c6387710d Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Thu, 6 Jul 2023 08:00:57 +0200 Subject: [PATCH] BUG: Dates conversion not working with Z00'00' (#1946) Closes #1943 --- pypdf/_reader.py | 17 ++++++++++++----- tests/test_reader.py | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 2bcd4175e..32105b271 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -242,14 +242,16 @@ def creation_date(self) -> Optional[datetime]: text = self._get_text(DI.CREATION_DATE) if text is None: return None - return datetime.strptime(text.replace("'", ""), "D:%Y%m%d%H%M%S%z") + return datetime.strptime( + text.replace("Z", "+").replace("'", ""), "D:%Y%m%d%H%M%S%z" + ) @property def creation_date_raw(self) -> Optional[str]: """ The "raw" version of creation date; can return a ``ByteStringObject``. - Typically in the format ``D:YYYYMMDDhhmmss[+-]hh'mm`` where the suffix + Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix is the offset from UTC. """ return self.get(DI.CREATION_DATE) @@ -264,7 +266,9 @@ def modification_date(self) -> Optional[datetime]: text = self._get_text(DI.MOD_DATE) if text is None: return None - return datetime.strptime(text.replace("'", ""), "D:%Y%m%d%H%M%S%z") + return datetime.strptime( + text.replace("Z", "+").replace("'", ""), "D:%Y%m%d%H%M%S%z" + ) @property def modification_date_raw(self) -> Optional[str]: @@ -272,7 +276,7 @@ def modification_date_raw(self) -> Optional[str]: The "raw" version of modification date; can return a ``ByteStringObject``. - Typically in the format ``D:YYYYMMDDhhmmss[+-]hh'mm`` where the suffix + Typically in the format ``D:YYYYMMDDhhmmss[+Z-]hh'mm`` where the suffix is the offset from UTC. """ return self.get(DI.MOD_DATE) @@ -644,7 +648,10 @@ def _build_field( if s not in states: states.append(s) retval[key][NameObject("/_States_")] = ArrayObject(states) - if obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0 and "/Off" in retval[key]["/_States_"]: + if ( + obj.get(FA.Ff, 0) & FA.FfBits.NoToggleToOff != 0 + and "/Off" in retval[key]["/_States_"] + ): del retval[key]["/_States_"][retval[key]["/_States_"].index("/Off")] def _check_kids( diff --git a/tests/test_reader.py b/tests/test_reader.py index e4bf9a5dd..69994f5ca 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -112,6 +112,23 @@ def test_read_metadata(pdf_path, expected): assert metadict["/Title"] == docinfo.title +def test_iss1943(): + reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + docinfo = reader.metadata + docinfo.update( + { + NameObject("/CreationDate"): TextStringObject("D:20230705005151Z00'00'"), + NameObject("/ModDate"): TextStringObject("D:20230705005151Z00'00'"), + } + ) + docinfo.creation_date + docinfo.creation_date_raw + docinfo.modification_date + docinfo.modification_date_raw + docinfo.update({NameObject("/CreationDate"): NumberObject(1)}) + assert docinfo.creation_date is None + + @pytest.mark.samples() @pytest.mark.parametrize( "pdf_path", [SAMPLE_ROOT / "017-unreadable-meta-data/unreadablemetadata.pdf"]