From 8a0135ef33e3309fcf3d73edf14ee76951bb7efc Mon Sep 17 00:00:00 2001 From: Bernhard Weichel Date: Thu, 30 Jan 2020 14:56:09 +0000 Subject: [PATCH 1/2] added configuration for gitpod --- .gitpod.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .gitpod.yml diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..84f77cb9 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,10 @@ + + +# List the ports you want to expose and what to do when they are served. See https://www.gitpod.io/docs/43_config_ports/ +ports: + - port: 3000 + onOpen: open-preview + +# List the start up tasks. You can start them in parallel in multiple terminals. See https://www.gitpod.io/docs/44_config_start_tasks/ +tasks: + - init: bundle install From bc95f9dc5b9255db24bb31f69258b03daffdf810 Mon Sep 17 00:00:00 2001 From: Bernhard Weichel Date: Thu, 30 Jan 2020 22:33:53 +0000 Subject: [PATCH 2/2] wip initial port for named_destinatinos from pypdf --- lib/pdf/reader.rb | 6 +++++ lib/pdf/reader/page.rb | 54 ++++++++++++++++++++++++++++++++++++++++ spec/integration_spec.rb | 20 +++++++++++++++ 3 files changed, 80 insertions(+) diff --git a/lib/pdf/reader.rb b/lib/pdf/reader.rb index 0ac514bd..60cf6ace 100644 --- a/lib/pdf/reader.rb +++ b/lib/pdf/reader.rb @@ -186,6 +186,12 @@ def pages end end + # return named detinations fo the document + def named_destinations + pages.first.named_destinations + end + + # returns a single PDF::Reader::Page for the specified page. # Use this instead of pages method when you need to access just a single # page diff --git a/lib/pdf/reader/page.rb b/lib/pdf/reader/page.rb index e58448e2..1df30c32 100644 --- a/lib/pdf/reader/page.rb +++ b/lib/pdf/reader/page.rb @@ -68,6 +68,60 @@ def attributes @attributes end + # Convenience method to retrieve he named destinations present in the document. + # implemented as port of https://github.com/mstamy2/PyPDF2/blob/18a2627adac13124d4122c8b92aaa863ccfb8c29/PyPDF2/pdf.py#L1350-L1389 + def named_destinations( tree = nil, retval = nil) + if retval.nil? # if initial call + retval = {} + tree = root[:Dests] # get dests from Catalog + if tree.nil? # if no global dests + names = root[:Names] # get name tree + if names + dests = @objects.deref(names)[:Dests] + if dests + tree = dests + end + end + end + end + + return retval if tree.nil? + + kids = @objects.deref(tree)[:Kids] # recurse down the tree + if kids + kids.each do |kid| + named_destinations(@objects.deref(kid), retval) + end + end + + names = @objects.deref(tree)[:Names] + if names + (0...names.length).step(2) do |i| + key = @objects.deref(names[i]) + val = @objects.deref(names[i+1]) + val_d = val[:D] + if val_d + dest = _build_destination(key, val_d) + retval[key] = dest + else + # this shoud not happen + # require 'pry';binding.pry + end + end + end + + retval + end + + private def _build_destination(title, array) + page, typ = array[0..2] + array = array[2..-1] + typ = @objects.deref(typ) + + {page: page, typ: typ} + end + + # Convenience method to identify the page's orientation. # def orientation diff --git a/spec/integration_spec.rb b/spec/integration_spec.rb index 446373e7..10412077 100644 --- a/spec/integration_spec.rb +++ b/spec/integration_spec.rb @@ -276,6 +276,26 @@ end end + context "extracts named destinations p" do + let(:filename){pdf_spec_file("pdflatex")} + + it "extracts named destinations correctly" do + PDF::Reader.open(filename) do |reader| + named_destinations = reader.named_destinations + expect(named_destinations.count).to eql(90) + expect(named_destinations.keys.first).to eql('Doc-Start') + end + end + + it "extracts named destinations correctly via page" do + PDF::Reader.open(filename) do |reader| + named_destinations = reader.page(10).named_destinations + expect(named_destinations.count).to eql(90) + expect(named_destinations.keys.first).to eql('Doc-Start') + end + end + end + context "encrypted_version1_revision2_40bit_rc4_user_pass_apples" do let(:filename) { pdf_spec_file("encrypted_version1_revision2_40bit_rc4_user_pass_apples") }