From cb324789feb4b5f53ff1ca591f1d225f1d432b81 Mon Sep 17 00:00:00 2001 From: Samkit Jain <15127115+samkit-jain@users.noreply.github.com> Date: Wed, 28 Oct 2020 00:21:06 +0530 Subject: [PATCH] Fix metadata extraction to correctly handle integer/floating-point values Fixes #297 --- CHANGELOG.md | 4 ++++ pdfplumber/pdf.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6a915d9..02ce280d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/). +## [0.5.25] — Unreleased +### Fixed +- Fix metadata extraction to handle integer/floating-point values ([#297](https://github.com/jsvine/pdfplumber/issues/297)) + ## [0.5.24] — 2020-10-20 ### Added - Added `extra_attrs=[...]` parameter to `.extract_text(...)` ([c8b200e](https://github.com/jsvine/pdfplumber/commit/c8b200e)) ([#28](https://github.com/jsvine/pdfplumber/issues/28)) diff --git a/pdfplumber/pdf.py b/pdfplumber/pdf.py index 9668e13e..686cad7f 100644 --- a/pdfplumber/pdf.py +++ b/pdfplumber/pdf.py @@ -33,10 +33,10 @@ def __init__(self, stream, pages=None, laparams=None, precision=0.001, password= self.metadata[k] = list(map(decode_text, v)) elif isinstance(v, PSLiteral): self.metadata[k] = decode_text(v.name) - elif isinstance(v, bool): - self.metadata[k] = v - else: + elif isinstance(v, (str, bytes)): self.metadata[k] = decode_text(v) + else: + self.metadata[k] = v self.device = PDFPageAggregator(rsrcmgr, laparams=self.laparams) self.interpreter = PDFPageInterpreter(rsrcmgr, self.device)