implement download of image resources

see #19
marph91 · Apr 19, 2023 · 198a443 · 198a443
1 parent 6f9abed
commit 198a443
Showing 1 changed file with 47 additions and 27 deletions.
diff --git a/examples/note_export.py b/examples/note_export.py
@@ -16,6 +16,7 @@
 
 import argparse
 import os
+import tempfile
 
 from joppy.api import Api
 import pypandoc
@@ -55,34 +56,53 @@ def main():
         candidates.extend([note for note in notes if note.title == title])
     print(f"Found {len(candidates)} matching notes.")
 
-    # Convert all notes to the specified format.
-    os.makedirs(args.output_folder, exist_ok=True)
-    for candidate in candidates:
-        note = api.get_note(id_=candidate.id, fields="body")
-
-        title_normalized = (
-            candidate.title.lower().replace(" ", "_") + "_" + candidate.id
-        )
-        output_path = f"{args.output_folder}/{title_normalized}.{args.output_format}"
-
-        valid_output_formats = pypandoc.get_pandoc_formats()[1]
-        if args.output_format not in valid_output_formats:
-            raise ValueError(
-                f"Invalid format: {args.output_format}. "
-                f"Valid formats: {valid_output_formats}."
+    # Create a temporary directory for the resources.
+    with tempfile.TemporaryDirectory() as tmpdirname:
+
+        # Convert all notes to the specified format.
+        os.makedirs(args.output_folder, exist_ok=True)
+        for candidate in candidates:
+            note = api.get_note(id_=candidate.id, fields="body")
+            note_body = note.body
+
+            # Download and add all image resources
+            resources = api.get_all_resources(note_id=candidate.id, fields="id,mime")
+            for resource in resources:
+                if not resource.mime.startswith("image"):
+                    continue
+                resource_binary = api.get_resource_file(resource.id)
+                with open(f"{tmpdirname}/{resource.id}", "wb") as outfile:
+                    outfile.write(resource_binary)
+                # Replace joplin's local link with the path to the just downloaded resource.
+                note_body = note_body.replace(
+                    f":/{resource.id}", f"{tmpdirname}/{resource.id}"
+                )
+
+            title_normalized = (
+                candidate.title.lower().replace(" ", "_") + "_" + candidate.id
+            )
+            output_path = (
+                f"{args.output_folder}/{title_normalized}.{args.output_format}"
+            )
+
+            valid_output_formats = pypandoc.get_pandoc_formats()[1]
+            if args.output_format not in valid_output_formats:
+                raise ValueError(
+                    f"Invalid format: {args.output_format}. "
+                    f"Valid formats: {valid_output_formats}."
+                )
+            # special arguments for some output formats
+            format_kwargs = {
+                # https://github.com/NicklasTegner/pypandoc/issues/186#issuecomment-673282133
+                "pdf": {"to": "html", "extra_args": ["--pdf-engine", "weasyprint"]}
+            }
+
+            pypandoc.convert_text(
+                f"# {candidate.title}\n{note_body}",
+                format="md",
+                outputfile=output_path,
+                **format_kwargs.get(args.output_format, {"to": args.output_format}),
             )
-        # special arguments for some output formats
-        format_kwargs = {
-            # https://github.com/NicklasTegner/pypandoc/issues/186#issuecomment-673282133
-            "pdf": {"to": "html", "extra_args": ["--pdf-engine", "weasyprint"]}
-        }
-
-        pypandoc.convert_text(
-            f"# {candidate.title}\n{note.body}",
-            format="md",
-            outputfile=output_path,
-            **format_kwargs.get(args.output_format, {"to": args.output_format}),
-        )
 
 
 if __name__ == "__main__":