Skip to content

Commit

Permalink
Export annotation color as hex RGB in JSON (#78)
Browse files Browse the repository at this point in the history
* add color when using json
* minor fixes

---------

Co-authored-by: Andrew Baumann <ab@ab.id.au>
  • Loading branch information
charlesneimog and 0xabu authored Jul 30, 2023
1 parent 9298307 commit 81ae77f
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 3 deletions.
14 changes: 12 additions & 2 deletions pdfannots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import pdfminer.settings
import pdfminer.utils

from .types import Page, Outline, AnnotationType, Annotation, Document
from .types import Page, Outline, AnnotationType, Annotation, Document, RGB
from .utils import cleanup_text, decode_datetime

pdfminer.settings.STRICT = False
Expand Down Expand Up @@ -71,6 +71,16 @@ def _mkannotation(
# decode as string, normalise line endings, replace special characters
contents = cleanup_text(pdfminer.utils.decode_text(contents))

rgb: typ.Optional[RGB] = None
color = pa.get('C')
if color is not None:
if (isinstance(color, list)
and len(color) == 3
and all(isinstance(e, float) and 0 <= e <= 1 for e in color)):
rgb = RGB(*color)
else:
logger.warning("Invalid color %s in annotation on %s", color, page)

# Rect defines the location of the annotation on the page
rect = pdftypes.resolve1(pa.get('Rect'))

Expand All @@ -94,7 +104,7 @@ def _mkannotation(
created = decode_datetime(createds)

return Annotation(page, annot_type, quadpoints, rect,
contents, author=author, created=created)
contents, author=author, created=created, color=rgb)


def _get_outlines(doc: PDFDocument) -> typ.Iterator[Outline]:
Expand Down
3 changes: 3 additions & 0 deletions pdfannots/printer/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ def annot_to_dict(
if annot.created:
result['created'] = annot.created.strftime('%Y-%m-%dT%H:%M:%S')

if annot.color:
result['color'] = annot.color.ashex()

return result


Expand Down
17 changes: 16 additions & 1 deletion pdfannots/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ class Annotation(ObjectWithPos):
text Text in the order captured (use gettext() for a cleaner form)
author Author of the annotation
created Timestamp the annotation was created
color RGB color of the annotation
last_charseq Sequence number of the most recent character in text
Attributes updated only for StrikeOut annotations:
Expand All @@ -297,7 +298,8 @@ def __init__(
rect: typ.Optional[BoxCoords] = None,
contents: typ.Optional[str] = None,
author: typ.Optional[str] = None,
created: typ.Optional[datetime.datetime] = None):
created: typ.Optional[datetime.datetime] = None,
color: typ.Optional[RGB] = None):

# Construct boxes from quadpoints
boxes = []
Expand All @@ -324,6 +326,7 @@ def __init__(
self.author = author
self.created = created
self.text = []
self.color = color
self.pre_context = None
self.post_context = None
self.boxes = boxes
Expand Down Expand Up @@ -479,3 +482,15 @@ def nearest_outline(
return page.outlines[idx - 1]

return None


class RGB(typ.NamedTuple):
red: float
green: float
blue: float

def ashex(self) -> str:
red_hex = format(int(self.red * 255), '02x')
green_hex = format(int(self.green * 255), '02x')
blue_hex = format(int(self.blue * 255), '02x')
return f"#{str(red_hex)}{str(green_hex)}{(blue_hex)}"

0 comments on commit 81ae77f

Please sign in to comment.