diff --git a/.gitignore b/.gitignore index 97f93ad19..75ef18635 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ docs/_build/ # Files generated by some of the scripts dont_commit_*.pdf PyPDF2-output.pdf +annotated-pdf-link.pdf Image9.png PyPDF2_pdfLocation.txt diff --git a/PyPDF2/_writer.py b/PyPDF2/_writer.py index 6f4c8db9c..d9e716d85 100644 --- a/PyPDF2/_writer.py +++ b/PyPDF2/_writer.py @@ -77,6 +77,7 @@ from .constants import TrailerKeys as TK from .constants import TypFitArguments, UserAccessPermissions from .generic import ( + AnnotationBuilder, ArrayObject, BooleanObject, ByteStringObject, @@ -1555,84 +1556,28 @@ def add_link( fit: FitType = "/Fit", *args: ZoomArgType, ) -> None: - """ - Add an internal link from a rectangular area to the specified page. - - :param int pagenum: index of the page on which to place the link. - :param int pagedest: index of the page to which the link should go. - :param rect: :class:`RectangleObject` or array of four - integers specifying the clickable rectangular area - ``[xLL, yLL, xUR, yUR]``, or string in the form ``"[ xLL yLL xUR yUR ]"``. - :param border: if provided, an array describing border-drawing - properties. See the PDF spec for details. No border will be - drawn if this argument is omitted. - :param str fit: Page fit or 'zoom' option (see below). Additional arguments may need - to be supplied. Passing ``None`` will be read as a null value for that coordinate. - - .. list-table:: Valid ``zoom`` arguments (see Table 8.2 of the PDF 1.7 reference for details) - :widths: 50 200 - - * - /Fit - - No additional arguments - * - /XYZ - - [left] [top] [zoomFactor] - * - /FitH - - [top] - * - /FitV - - [left] - * - /FitR - - [left] [bottom] [right] [top] - * - /FitB - - No additional arguments - * - /FitBH - - [top] - * - /FitBV - - [left] - """ - pages_obj = cast(Dict[str, Any], self.get_object(self._pages)) - page_link = pages_obj[PA.KIDS][pagenum] - page_dest = pages_obj[PA.KIDS][pagedest] # TODO: switch for external link - page_ref = cast(Dict[str, Any], self.get_object(page_link)) - - border_arr: BorderArrayType - if border is not None: - border_arr = [NameObject(n) for n in border[:3]] - if len(border) == 4: - dash_pattern = ArrayObject([NameObject(n) for n in border[3]]) - border_arr.append(dash_pattern) - else: - border_arr = [NumberObject(0)] * 3 + deprecate_with_replacement( + "add_link", "add_annotation(AnnotationBuilder.link(...))" + ) if isinstance(rect, str): - rect = NameObject(rect) + rect = rect.strip()[1:-1] + rect = RectangleObject( + [float(num) for num in rect.split(" ") if len(num) > 0] + ) elif isinstance(rect, RectangleObject): pass else: rect = RectangleObject(rect) - zoom_args: ZoomArgsType = [ - NullObject() if a is None else NumberObject(a) for a in args - ] - dest = Destination( - NameObject("/LinkName"), page_dest, NameObject(fit), *zoom_args - ) # TODO: create a better name for the link - - lnk = DictionaryObject( - { - NameObject("/Type"): NameObject(PG.ANNOTS), - NameObject("/Subtype"): NameObject("/Link"), - NameObject("/P"): page_link, - NameObject("/Rect"): rect, - NameObject("/Border"): ArrayObject(border_arr), - NameObject("/Dest"): dest.dest_array, - } + annotation = AnnotationBuilder.link( + rect=rect, + border=border, + target_page_index=pagedest, + fit=fit, + fit_args=args, ) - lnk_ref = self._add_object(lnk) - - if PG.ANNOTS in page_ref: - page_ref[PG.ANNOTS].append(lnk_ref) - else: - page_ref[NameObject(PG.ANNOTS)] = ArrayObject([lnk_ref]) + return self.add_annotation(page_number=pagenum, annotation=annotation) def addLink( # pragma: no cover self, @@ -1648,7 +1593,9 @@ def addLink( # pragma: no cover Use :meth:`add_link` instead. """ - deprecate_with_replacement("addLink", "add_link") + deprecate_with_replacement( + "addLink", "add_annotation(AnnotationBuilder.link(...))", "4.0.0" + ) return self.add_link(pagenum, pagedest, rect, border, fit, *args) _valid_layouts = ( @@ -1873,6 +1820,18 @@ def add_annotation(self, page_number: int, annotation: Dict[str, Any]) -> None: page[NameObject("/Annots")] = ArrayObject() assert page.annotations is not None + # Internal link annotations need the correct object type for the + # destination + if to_add.get("/Subtype") == "/Link" and NameObject("/Dest") in to_add: + tmp = cast(dict, to_add[NameObject("/Dest")]) + dest = Destination( + NameObject("/LinkName"), + tmp["target_page_index"], + tmp["fit"], + *tmp["fit_args"], + ) + to_add[NameObject("/Dest")] = dest.dest_array + ind_obj = self._add_object(to_add) page.annotations.append(ind_obj) diff --git a/PyPDF2/generic.py b/PyPDF2/generic.py index 0b79cdb50..8fc597c0a 100644 --- a/PyPDF2/generic.py +++ b/PyPDF2/generic.py @@ -70,6 +70,7 @@ ) from .constants import CheckboxRadioButtonAttributes, FieldDictionaryAttributes from .constants import FilterTypes as FT +from .constants import PageAttributes as PG from .constants import StreamAttributes as SA from .constants import TypArguments as TA from .constants import TypFitArguments as TF @@ -1375,7 +1376,9 @@ class RectangleObject(ArrayObject): * :attr:`trimbox ` """ - def __init__(self, arr: Tuple[float, float, float, float]) -> None: + def __init__( + self, arr: Union["RectangleObject", Tuple[float, float, float, float]] + ) -> None: # must have four points assert len(arr) == 4 # automatically convert arr[x] into NumberObject(arr[x]) if necessary @@ -2075,10 +2078,12 @@ def hex_to_rgb(value: str) -> Tuple[float, float, float]: class AnnotationBuilder: + from .types import FitType, ZoomArgType + @staticmethod def free_text( text: str, - rect: Tuple[float, float, float, float], + rect: Union[RectangleObject, Tuple[float, float, float, float]], font: str = "Helvetica", bold: bool = False, italic: bool = False, @@ -2087,7 +2092,21 @@ def free_text( border_color: str = "000000", background_color: str = "ffffff", ) -> DictionaryObject: - """Add text in a rectangle to a page.""" + """ + Add text in a rectangle to a page. + + :param str text: Text to be added + :param :class:`RectangleObject` rect: or array of four + integers specifying the clickable rectangular area + ``[xLL, yLL, xUR, yUR]`` + :param str font: Name of the Font, e.g. 'Helvetica' + :param bool bold: Print the text in bold + :param bool italic: Print the text in italic + :param str font_size: How big the text will be, e.g. '14pt' + :param str font_color: Hex-string for the color + :param str border_color: Hex-string for the border color + :param str background_color: Hex-string for the background of the annotation + """ font_str = "font: " if bold is True: font_str = font_str + "bold " @@ -2124,18 +2143,20 @@ def free_text( def line( p1: Tuple[float, float], p2: Tuple[float, float], - rect: Tuple[float, float, float, float], + rect: Union[RectangleObject, Tuple[float, float, float, float]], text: str = "", title_bar: str = "", ) -> DictionaryObject: """ Draw a line on the PDF. - :param p1: First point - :param p2: Second point - :param rect: Rectangle - :param text: Text to be displayed as the line annotation - :param title_bar: Text to be displayed in the title bar of the + :param Tuple[float, float] p1: First point + :param Tuple[float, float] p2: Second point + :param :class:`RectangleObject` rect: or array of four + integers specifying the clickable rectangular area + ``[xLL, yLL, xUR, yUR]`` + :param str text: Text to be displayed as the line annotation + :param str title_bar: Text to be displayed in the title bar of the annotation; by convention this is the name of the author """ line_obj = DictionaryObject( @@ -2169,3 +2190,112 @@ def line( } ) return line_obj + + @staticmethod + def link( + rect: Union[RectangleObject, Tuple[float, float, float, float]], + border: Optional[ArrayObject] = None, + url: Optional[str] = None, + target_page_index: Optional[int] = None, + fit: FitType = "/Fit", + fit_args: Tuple[ZoomArgType, ...] = tuple(), + ) -> DictionaryObject: + """ + Add a link to the document. + + The link can either be an external link or an internal link. + + An external link requires the URL parameter. + An internal link requires the target_page_index, fit, and fit args. + + + :param :class:`RectangleObject` rect: or array of four + integers specifying the clickable rectangular area + ``[xLL, yLL, xUR, yUR]`` + :param border: if provided, an array describing border-drawing + properties. See the PDF spec for details. No border will be + drawn if this argument is omitted. + - horizontal corner radius, + - vertical corner radius, and + - border width + - Optionally: Dash + :param str url: Link to a website (if you want to make an external link) + :param int target_page_index: index of the page to which the link should go + (if you want to make an internal link) + :param str fit: Page fit or 'zoom' option (see below). Additional arguments may need + to be supplied. Passing ``None`` will be read as a null value for that coordinate. + :param Tuple[int, ...] fit_args: Parameters for the fit argument. + + + .. list-table:: Valid ``fit`` arguments (see Table 8.2 of the PDF 1.7 reference for details) + :widths: 50 200 + + * - /Fit + - No additional arguments + * - /XYZ + - [left] [top] [zoomFactor] + * - /FitH + - [top] + * - /FitV + - [left] + * - /FitR + - [left] [bottom] [right] [top] + * - /FitB + - No additional arguments + * - /FitBH + - [top] + * - /FitBV + - [left] + """ + from .types import BorderArrayType + + is_external = url is not None + is_internal = target_page_index is not None + if not is_external and not is_internal: + raise ValueError( + "Either 'url' or 'target_page_index' have to be provided. Both were None." + ) + if is_external and is_internal: + raise ValueError( + f"Either 'url' or 'target_page_index' have to be provided. url={url}, target_page_index={target_page_index}" + ) + + border_arr: BorderArrayType + if border is not None: + border_arr = [NameObject(n) for n in border[:3]] + if len(border) == 4: + dash_pattern = ArrayObject([NameObject(n) for n in border[3]]) + border_arr.append(dash_pattern) + else: + border_arr = [NumberObject(0)] * 3 + + link_obj = DictionaryObject( + { + NameObject("/Type"): NameObject(PG.ANNOTS), + NameObject("/Subtype"): NameObject("/Link"), + NameObject("/Rect"): RectangleObject(rect), + NameObject("/Border"): ArrayObject(border_arr), + } + ) + if is_external: + link_obj[NameObject("/A")] = DictionaryObject( + { + NameObject("/S"): NameObject("/URI"), + NameObject("/Type"): NameObject("/Action"), + NameObject("/URI"): TextStringObject(url), + } + ) + if is_internal: + fit_arg_ready = [ + NullObject() if a is None else NumberObject(a) for a in fit_args + ] + # This needs to be updated later! + dest_deferred = DictionaryObject( + { + "target_page_index": NumberObject(target_page_index), + "fit": NameObject(fit), + "fit_args": ArrayObject(fit_arg_ready), + } + ) + link_obj[NameObject("/Dest")] = dest_deferred + return link_obj diff --git a/docs/user/adding-pdf-annotations.md b/docs/user/adding-pdf-annotations.md index 78d130f21..890dfde1c 100644 --- a/docs/user/adding-pdf-annotations.md +++ b/docs/user/adding-pdf-annotations.md @@ -82,3 +82,47 @@ writer.add_annotation(page_number=0, annotation=annotation) with open("annotated-pdf.pdf", "wb") as fp: writer.write(fp) ``` + +## Link + +If you want to add a link, you can use +the {py:class}`AnnotationBuilder `: + +```python +pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") +reader = PdfReader(pdf_path) +page = reader.pages[0] +writer = PdfWriter() +writer.add_page(page) + +# Add the line +annotation = AnnotationBuilder.link( + rect=(50, 550, 200, 650), + url="https://martin-thoma.com/", +) +writer.add_annotation(page_number=0, annotation=annotation) + +# Write the annotated file to disk +with open("annotated-pdf.pdf", "wb") as fp: + writer.write(fp) +``` + +You can also add internal links: + +```python +pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf") +reader = PdfReader(pdf_path) +page = reader.pages[0] +writer = PdfWriter() +writer.add_page(page) + +# Add the line +annotation = AnnotationBuilder.link( + rect=(50, 550, 200, 650), target_page_index=3, fit="/FitH", fit_args=(123,) +) +writer.add_annotation(page_number=0, annotation=annotation) + +# Write the annotated file to disk +with open("annotated-pdf.pdf", "wb") as fp: + writer.write(fp) +``` diff --git a/sample-files b/sample-files index 200644f72..b6f4ff3de 160000 --- a/sample-files +++ b/sample-files @@ -1 +1 @@ -Subproject commit 200644f7219811c3930ad1732ef70c570ece2d16 +Subproject commit b6f4ff3de00745783d79f25cb8803901d1f20d28 diff --git a/tests/test_generic.py b/tests/test_generic.py index 4a5f6dd91..11871ae03 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -510,7 +510,7 @@ def test_annotation_builder_free_text(): writer.add_page(page) # Act - annotation = AnnotationBuilder.free_text( + free_text_annotation = AnnotationBuilder.free_text( "Hello World\nThis is the second line!", rect=(50, 550, 200, 650), font="Arial", @@ -521,7 +521,7 @@ def test_annotation_builder_free_text(): border_color="0000ff", background_color="cdcdcd", ) - writer.add_annotation(0, annotation) + writer.add_annotation(0, free_text_annotation) # Assert: You need to inspect the file manually target = "annotated-pdf.pd" @@ -540,13 +540,13 @@ def test_annotation_builder_line(): writer.add_page(page) # Act - annotation = AnnotationBuilder.line( + line_annotation = AnnotationBuilder.line( text="Hello World\nLine2", rect=(50, 550, 200, 650), p1=(50, 550), p2=(200, 650), ) - writer.add_annotation(0, annotation) + writer.add_annotation(0, line_annotation) # Assert: You need to inspect the file manually target = "annotated-pdf.pd" @@ -556,5 +556,63 @@ def test_annotation_builder_line(): os.remove(target) # comment this out for manual inspection +def test_annotation_builder_link(): + # Arrange + pdf_path = os.path.join(RESOURCE_ROOT, "outline-without-title.pdf") + reader = PdfReader(pdf_path) + page = reader.pages[0] + writer = PdfWriter() + writer.add_page(page) + + # Act + # Part 1: Too many args + with pytest.raises(ValueError) as exc: + AnnotationBuilder.link( + rect=(50, 550, 200, 650), + url="https://martin-thoma.com/", + target_page_index=3, + ) + assert ( + exc.value.args[0] + == "Either 'url' or 'target_page_index' have to be provided. url=https://martin-thoma.com/, target_page_index=3" + ) + + # Part 2: Too few args + with pytest.raises(ValueError) as exc: + AnnotationBuilder.link( + rect=(50, 550, 200, 650), + ) + assert ( + exc.value.args[0] + == "Either 'url' or 'target_page_index' have to be provided. Both were None." + ) + + # Part 3: External Link + link_annotation = AnnotationBuilder.link( + rect=(50, 50, 100, 100), + url="https://martin-thoma.com/", + border=[1, 0, 6, [3, 2]], + ) + writer.add_annotation(0, link_annotation) + + # Part 4: Internal Link + link_annotation = AnnotationBuilder.link( + rect=(100, 100, 300, 200), + target_page_index=1, + border=[50, 10, 4], + ) + writer.add_annotation(0, link_annotation) + + for page in reader.pages[1:]: + writer.add_page(page) + + # Assert: You need to inspect the file manually + target = "annotated-pdf-link.pdf" + with open(target, "wb") as fp: + writer.write(fp) + + # os.remove(target) # comment this out for manual inspection + + def test_CheckboxRadioButtonAttributes_opt(): assert "/Opt" in CheckboxRadioButtonAttributes.attributes_dict() diff --git a/tests/test_page.py b/tests/test_page.py index 6cbc068ce..0ca50c0eb 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -469,3 +469,32 @@ def test_empyt_password_1088(): def test_arab_text_extraction(): reader = PdfReader(EXTERNAL_ROOT / "015-arabic/habibi.pdf") assert reader.pages[0].extract_text() == "habibi حَبيبي" + + +def test_read_link_annotation(): + reader = PdfReader(EXTERNAL_ROOT / "016-libre-office-link/libre-office-link.pdf") + assert len(reader.pages[0].annotations) == 1 + annot = dict(reader.pages[0].annotations[0].get_object()) + expected = { + "/Type": "/Annot", + "/Subtype": "/Link", + "/A": DictionaryObject( + { + "/S": "/URI", + "/Type": "/Action", + "/URI": "https://martin-thoma.com/", + } + ), + "/Border": ArrayObject([0, 0, 0]), + "/Rect": [ + 92.043, + 771.389, + 217.757, + 785.189, + ], + } + + assert set(expected.keys()) == set(annot.keys()) + del expected["/Rect"] + del annot["/Rect"] + assert annot == expected diff --git a/tests/test_writer.py b/tests/test_writer.py index 55358af81..900fedb68 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -71,7 +71,8 @@ def test_writer_operations(): ) writer.add_blank_page() writer.add_uri(2, "https://example.com", RectangleObject([0, 0, 100, 100])) - writer.add_link(2, 1, RectangleObject([0, 0, 100, 100])) + with pytest.warns(PendingDeprecationWarning): + writer.add_link(2, 1, RectangleObject([0, 0, 100, 100])) assert writer._get_page_layout() is None writer._set_page_layout("/SinglePage") assert writer._get_page_layout() == "/SinglePage" @@ -418,30 +419,36 @@ def test_add_link(): from PyPDF2.generic import RectangleObject - writer.add_link( - 1, - 2, - RectangleObject([0, 0, 100, 100]), - border=[1, 2, 3, [4]], - fit="/Fit", - ) - writer.add_link(2, 3, RectangleObject([20, 30, 50, 80]), [1, 2, 3], "/FitH", None) - writer.add_link( - 3, - 0, - "[ 200 300 250 350 ]", - [0, 0, 0], - "/XYZ", - 0, - 0, - 2, - ) - writer.add_link( - 3, - 0, - [100, 200, 150, 250], - border=[0, 0, 0], - ) + with pytest.warns( + PendingDeprecationWarning, + match="add_link is deprecated and will be removed in PyPDF2", + ): + writer.add_link( + 1, + 2, + RectangleObject([0, 0, 100, 100]), + border=[1, 2, 3, [4]], + fit="/Fit", + ) + writer.add_link( + 2, 3, RectangleObject([20, 30, 50, 80]), [1, 2, 3], "/FitH", None + ) + writer.add_link( + 3, + 0, + "[ 200 300 250 350 ]", + [0, 0, 0], + "/XYZ", + 0, + 0, + 2, + ) + writer.add_link( + 3, + 0, + [100, 200, 150, 250], + border=[0, 0, 0], + ) # write "output" to PyPDF2-output.pdf tmp_filename = "dont_commit_link.pdf"