From 89b7df6a92739cb3602e8ced21a1930e57c06dc6 Mon Sep 17 00:00:00 2001
From: Kenshi Muto <kmuto@debian.org>
Date: Sat, 18 Aug 2018 13:06:15 +0900
Subject: [PATCH 1/3] add review-epub2html tool

---
 bin/review-epub2html | 127 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100755 bin/review-epub2html
diff --git a/bin/review-epub2html b/bin/review-epub2html
new file mode 100755
index 000000000..9fb3fe5e7
--- /dev/null
+++ b/bin/review-epub2html
@@ -0,0 +1,127 @@
+#!/usr/bin/env ruby
+#
+# Copyright (c) 2018 Kenshi Muto
+#
+# This program is free software.
+# You can distribute or modify this program under the terms of
+# the GNU LGPL, Lesser General Public License version 2.1.
+# For details of the GNU LGPL, see the file "COPYING".
+#
+
+require 'zip'
+require 'rexml/document'
+require 'cgi'
+
+module ReVIEW
+  class Epub2Html
+    def initialize
+      @opfxml = nil
+      @htmls = {}
+      @head = nil
+      @tail = nil
+    end
+
+    def parse_epub(epubname)
+      Zip::File.open(epubname) do |zio|
+        zio.each do |entry|
+          if entry.name =~ /.+\.opf\Z/
+            opf = entry.get_input_stream.read
+            @opfxml = REXML::Document.new(opf)
+          elsif entry.name =~ /.+\.x?html\Z/
+            @htmls[entry.name.sub('OEBPS/', '')] = entry.get_input_stream.read.force_encoding('utf-8')
+          end
+        end
+      end
+      nil
+    end
+
+    def take_headtail(html)
+      @head = html.sub(/(<body.*?>).*/m, '\1')
+      @tail = html.sub(%r{.*(</body>)}m, '\1')
+    end
+
+    def sanitize(s)
+      s = s.sub(/\.x?html\Z/, '').
+          sub(%r{\A\./}, '')
+      's_' + CGI.escape(s).
+             gsub(/[.,+%]/, '_')
+    end
+
+    def modify_html(fname, html)
+      doc = REXML::Document.new(html)
+      doc.context[:attribute_quote] = :quote
+
+      ids = {}
+
+      doc.each_element('//*[@id]') do |e|
+        sid = "#{sanitize(fname)}_#{sanitize(e.attributes['id'])}"
+        while ids[sid]
+          sid += 'E'
+        end
+        ids[sid] = true
+        e.attributes['id'] = sid
+      end
+
+      doc.each_element('//a[@href]') do |e|
+        href = e.attributes['href']
+        if href.start_with?('http:', 'https:', 'ftp:', 'ftps:', 'mailto:')
+          next
+        end
+
+        file, anc = href.split('#', 2)
+        if anc
+          if file.empty?
+            anc = "#{sanitize(fname)}_#{sanitize(anc)}"
+          else
+            anc = "#{sanitize(file)}_#{sanitize(anc)}"
+          end
+        else
+          anc = sanitize(file)
+        end
+
+        e.attributes['href'] = "##{anc}"
+      end
+
+      doc.to_s.
+        sub(/.*(<body.*?>)/m, %Q(<section id="#{sanitize(fname)}">)).
+        sub(%r{(</body>).*}m, '</section>')
+    end
+
+    def join_html(reffile)
+      body = []
+      make_list.each do |fname|
+        if @head.nil? && (reffile.nil? || reffile == fname)
+          take_headtail(@htmls[fname])
+        end
+
+        body << modify_html(fname, @htmls[fname])
+      end
+      "#{@head}\n#{body.join("\n")}\n#{@tail}"
+    end
+
+    def make_list
+      items = {}
+      @opfxml.each_element("//package/manifest/item[@media-type='application/xhtml+xml']") do |e|
+        items[e.attributes['id']] = e.attributes['href']
+      end
+
+      files = []
+      @opfxml.each_element('//package/spine/itemref') do |e|
+        files.push(items[e.attributes['idref']])
+      end
+
+      files
+    end
+  end
+end
+
+if ARGV[0].nil? || !File.exist?(ARGV[0])
+  STDERR.puts <<EOT
+Usage: #{File.basename($PROGRAM_NAME)} EPUB [file_for_head_and_foot] > HTML
+EOT
+  exit 1
+end
+
+eph = ReVIEW::Epub2Html.new
+eph.parse_epub(ARGV[0])
+puts eph.join_html(ARGV[1])

From c01860cb2a07013f721b5058666c7c492a8ca43e Mon Sep 17 00:00:00 2001
From: Kenshi Muto <kmuto@debian.org>
Date: Sat, 18 Aug 2018 13:27:13 +0900
Subject: [PATCH 2/3] remove unnecessary subdir finder

---
 bin/review-epub2html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bin/review-epub2html b/bin/review-epub2html
index 9fb3fe5e7..bfc2dd98f 100755
--- a/bin/review-epub2html
+++ b/bin/review-epub2html
@@ -101,12 +101,12 @@ module ReVIEW
 
     def make_list
       items = {}
-      @opfxml.each_element("//package/manifest/item[@media-type='application/xhtml+xml']") do |e|
+      @opfxml.each_element("/package/manifest/item[@media-type='application/xhtml+xml']") do |e|
         items[e.attributes['id']] = e.attributes['href']
       end
 
       files = []
-      @opfxml.each_element('//package/spine/itemref') do |e|
+      @opfxml.each_element('/package/spine/itemref') do |e|
         files.push(items[e.attributes['idref']])
       end
 

From 39a242bc1a5f86ed3a49a07effd65e6cf8a64509 Mon Sep 17 00:00:00 2001
From: Kenshi Muto <kmuto@debian.org>
Date: Mon, 20 Aug 2018 19:13:14 +0900
Subject: [PATCH 3/3] split logic to lib

---
 bin/review-epub2html    | 120 ++----------------------------------
 lib/review/epub2html.rb | 133 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 139 insertions(+), 114 deletions(-)
 create mode 100644 lib/review/epub2html.rb

diff --git a/bin/review-epub2html b/bin/review-epub2html
index bfc2dd98f..d5d30bfa2 100755
--- a/bin/review-epub2html
+++ b/bin/review-epub2html
@@ -6,122 +6,14 @@
 # You can distribute or modify this program under the terms of
 # the GNU LGPL, Lesser General Public License version 2.1.
 # For details of the GNU LGPL, see the file "COPYING".
-#
-
-require 'zip'
-require 'rexml/document'
-require 'cgi'
-
-module ReVIEW
-  class Epub2Html
-    def initialize
-      @opfxml = nil
-      @htmls = {}
-      @head = nil
-      @tail = nil
-    end
-
-    def parse_epub(epubname)
-      Zip::File.open(epubname) do |zio|
-        zio.each do |entry|
-          if entry.name =~ /.+\.opf\Z/
-            opf = entry.get_input_stream.read
-            @opfxml = REXML::Document.new(opf)
-          elsif entry.name =~ /.+\.x?html\Z/
-            @htmls[entry.name.sub('OEBPS/', '')] = entry.get_input_stream.read.force_encoding('utf-8')
-          end
-        end
-      end
-      nil
-    end
-
-    def take_headtail(html)
-      @head = html.sub(/(<body.*?>).*/m, '\1')
-      @tail = html.sub(%r{.*(</body>)}m, '\1')
-    end
-
-    def sanitize(s)
-      s = s.sub(/\.x?html\Z/, '').
-          sub(%r{\A\./}, '')
-      's_' + CGI.escape(s).
-             gsub(/[.,+%]/, '_')
-    end
-
-    def modify_html(fname, html)
-      doc = REXML::Document.new(html)
-      doc.context[:attribute_quote] = :quote
-
-      ids = {}
-
-      doc.each_element('//*[@id]') do |e|
-        sid = "#{sanitize(fname)}_#{sanitize(e.attributes['id'])}"
-        while ids[sid]
-          sid += 'E'
-        end
-        ids[sid] = true
-        e.attributes['id'] = sid
-      end
 
-      doc.each_element('//a[@href]') do |e|
-        href = e.attributes['href']
-        if href.start_with?('http:', 'https:', 'ftp:', 'ftps:', 'mailto:')
-          next
-        end
+require 'pathname'
 
-        file, anc = href.split('#', 2)
-        if anc
-          if file.empty?
-            anc = "#{sanitize(fname)}_#{sanitize(anc)}"
-          else
-            anc = "#{sanitize(file)}_#{sanitize(anc)}"
-          end
-        else
-          anc = sanitize(file)
-        end
+bindir = Pathname.new(__FILE__).realpath.dirname
+$LOAD_PATH.unshift((bindir + '../lib').realpath)
 
-        e.attributes['href'] = "##{anc}"
-      end
+require 'review/epub2html'
 
-      doc.to_s.
-        sub(/.*(<body.*?>)/m, %Q(<section id="#{sanitize(fname)}">)).
-        sub(%r{(</body>).*}m, '</section>')
-    end
-
-    def join_html(reffile)
-      body = []
-      make_list.each do |fname|
-        if @head.nil? && (reffile.nil? || reffile == fname)
-          take_headtail(@htmls[fname])
-        end
-
-        body << modify_html(fname, @htmls[fname])
-      end
-      "#{@head}\n#{body.join("\n")}\n#{@tail}"
-    end
-
-    def make_list
-      items = {}
-      @opfxml.each_element("/package/manifest/item[@media-type='application/xhtml+xml']") do |e|
-        items[e.attributes['id']] = e.attributes['href']
-      end
-
-      files = []
-      @opfxml.each_element('/package/spine/itemref') do |e|
-        files.push(items[e.attributes['idref']])
-      end
-
-      files
-    end
-  end
-end
-
-if ARGV[0].nil? || !File.exist?(ARGV[0])
-  STDERR.puts <<EOT
-Usage: #{File.basename($PROGRAM_NAME)} EPUB [file_for_head_and_foot] > HTML
-EOT
-  exit 1
+if File.basename($PROGRAM_NAME) == File.basename(__FILE__)
+  ReVIEW::Epub2Html.execute(*ARGV)
 end
-
-eph = ReVIEW::Epub2Html.new
-eph.parse_epub(ARGV[0])
-puts eph.join_html(ARGV[1])
diff --git a/lib/review/epub2html.rb b/lib/review/epub2html.rb
new file mode 100644
index 000000000..27a1e20d0
--- /dev/null
+++ b/lib/review/epub2html.rb
@@ -0,0 +1,133 @@
+#
+# Copyright (c) 2018 Kenshi Muto
+#
+# This program is free software.
+# You can distribute or modify this program under the terms of
+# the GNU LGPL, Lesser General Public License version 2.1.
+# For details of the GNU LGPL, see the file "COPYING".
+
+require 'zip'
+require 'rexml/document'
+require 'cgi'
+
+module ReVIEW
+  class Epub2Html
+    def self.execute(*args)
+      new.execute(*args)
+    end
+
+    def execute(*args)
+      if args[0].nil? || !File.exist?(args[0])
+        STDERR.puts <<EOT
+Usage: #{File.basename($PROGRAM_NAME)} EPUBfile [file_for_head_and_foot] > HTMLfile
+       file_for_head_and_foot: HTML file to extract header and footer area.
+                               This file must be contained in the EPUB.
+                               If omitted, the first found file is used.
+EOT
+        exit 1
+      end
+
+      parse_epub(args[0])
+      puts join_html(args[1])
+    end
+
+    def initialize
+      @opfxml = nil
+      @htmls = {}
+      @head = nil
+      @tail = nil
+    end
+
+    def parse_epub(epubname)
+      Zip::File.open(epubname) do |zio|
+        zio.each do |entry|
+          if entry.name =~ /.+\.opf\Z/
+            opf = entry.get_input_stream.read
+            @opfxml = REXML::Document.new(opf)
+          elsif entry.name =~ /.+\.x?html\Z/
+            @htmls[entry.name.sub('OEBPS/', '')] = entry.get_input_stream.read.force_encoding('utf-8')
+          end
+        end
+      end
+      nil
+    end
+
+    def take_headtail(html)
+      @head = html.sub(/(<body.*?>).*/m, '\1')
+      @tail = html.sub(%r{.*(</body>)}m, '\1')
+    end
+
+    def sanitize(s)
+      s = s.sub(/\.x?html\Z/, '').
+          sub(%r{\A\./}, '')
+      's_' + CGI.escape(s).
+             gsub(/[.,+%]/, '_')
+    end
+
+    def modify_html(fname, html)
+      doc = REXML::Document.new(html)
+      doc.context[:attribute_quote] = :quote
+
+      ids = {}
+
+      doc.each_element('//*[@id]') do |e|
+        sid = "#{sanitize(fname)}_#{sanitize(e.attributes['id'])}"
+        while ids[sid]
+          sid += 'E'
+        end
+        ids[sid] = true
+        e.attributes['id'] = sid
+      end
+
+      doc.each_element('//a[@href]') do |e|
+        href = e.attributes['href']
+        if href.start_with?('http:', 'https:', 'ftp:', 'ftps:', 'mailto:')
+          next
+        end
+
+        file, anc = href.split('#', 2)
+        if anc
+          if file.empty?
+            anc = "#{sanitize(fname)}_#{sanitize(anc)}"
+          else
+            anc = "#{sanitize(file)}_#{sanitize(anc)}"
+          end
+        else
+          anc = sanitize(file)
+        end
+
+        e.attributes['href'] = "##{anc}"
+      end
+
+      doc.to_s.
+        sub(/.*(<body.*?>)/m, %Q(<section id="#{sanitize(fname)}">)).
+        sub(%r{(</body>).*}m, '</section>')
+    end
+
+    def join_html(reffile)
+      body = []
+      make_list.each do |fname|
+        if @head.nil? && (reffile.nil? || reffile == fname)
+          take_headtail(@htmls[fname])
+        end
+
+        body << modify_html(fname, @htmls[fname])
+      end
+      "#{@head}\n#{body.join("\n")}\n#{@tail}"
+    end
+
+    def make_list
+      items = {}
+      @opfxml.each_element("/package/manifest/item[@media-type='application/xhtml+xml']") do |e|
+        items[e.attributes['id']] = e.attributes['href']
+      end
+
+      files = []
+      @opfxml.each_element('/package/spine/itemref') do |e|
+        files.push(items[e.attributes['idref']])
+      end
+
+      files
+    end
+  end
+end