pdfminer · pietermarsman · Aug 31, 2021 · Apr 24, 2021 · Apr 24, 2021 · Aug 31, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Fix issue of some Chinese characters can not be extracted correctly ([#593](https://github.com/pdfminer/pdfminer.six/pull/593))
 - Detecting trailer correctly when surrounded with needless whitespace ([#535](https://github.com/pdfminer/pdfminer.six/pull/535))
 - Fix `.paint_path` logic for handling single line segments and extracting point-on-curve positions of Beziér path commands ([#530](https://github.com/pdfminer/pdfminer.six/pull/530))
+- Raising `UnboundLocalError` when a bad `--output-type`  is used ([#610](https://github.com/pdfminer/pdfminer.six/pull/610))
+- `TypeError` when using `TagExtractor` with non-string or non-bytes tag values ([#610](https://github.com/pdfminer/pdfminer.six/pull/610))
 
 ## Removed
 - Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))

diff --git a/pdfminer/high_level.py b/pdfminer/high_level.py
@@ -56,25 +56,33 @@ def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8',
         imagewriter = ImageWriter(output_dir)
 
     rsrcmgr = PDFResourceManager(caching=not disable_caching)
+    device = None
+
+    if output_type != 'text' and outfp == sys.stdout:
+        outfp = sys.stdout.buffer
 
     if output_type == 'text':
         device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
                                imagewriter=imagewriter)
 
-    if outfp == sys.stdout:
-        outfp = sys.stdout.buffer
-
-    if output_type == 'xml':
+    elif output_type == 'xml':
         device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams,
                               imagewriter=imagewriter,
                               stripcontrol=strip_control)
+
     elif output_type == 'html':
         device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale,
                                layoutmode=layoutmode, laparams=laparams,
                                imagewriter=imagewriter)
+
     elif output_type == 'tag':
         device = TagExtractor(rsrcmgr, outfp, codec=codec)
 
+    else:
+        msg = f"Output type can be text, html, xml or tag but is " \
+              f"{output_type}"
+        raise ValueError(msg)
+
     interpreter = PDFPageInterpreter(rsrcmgr, device)
     for page in PDFPage.get_pages(inf,
                                   page_numbers,

diff --git a/pdfminer/pdfdevice.py b/pdfminer/pdfdevice.py
@@ -154,40 +154,44 @@ def render_string(self, textstate, seq, ncs, graphicstate):
                     char = font.to_unichr(cid)
                     text += char
                 except PDFUnicodeNotDefined:
-                    print(chars)
                     pass
-        self.outfp.write(utils.enc(text))
+        self._write(utils.enc(text))
         return
 
     def begin_page(self, page, ctm):
         output = '<page id="%s" bbox="%s" rotate="%d">' %\
                  (self.pageno, utils.bbox2str(page.mediabox), page.rotate)
-        self.outfp.write(utils.make_compat_bytes(output))
+        self._write(output)
         return
 
     def end_page(self, page):
-        self.outfp.write(utils.make_compat_bytes('</page>\n'))
+        self._write('</page>\n')
         self.pageno += 1
         return
 
     def begin_tag(self, tag, props=None):
         s = ''
         if isinstance(props, dict):
-            s = ''.join(' {}="{}"'.format(utils.enc(k), utils.enc(str(v)))
-                        for (k, v) in sorted(props.items()))
+            s = ''.join([
+                ' {}="{}"'.format(utils.enc(k), utils.make_compat_str(v))
+                for (k, v) in sorted(props.items())
+            ])
         out_s = '<{}{}>'.format(utils.enc(tag.name), s)
-        self.outfp.write(utils.make_compat_bytes(out_s))
+        self._write(out_s)
         self._stack.append(tag)
         return
 
     def end_tag(self):
         assert self._stack, str(self.pageno)
         tag = self._stack.pop(-1)
         out_s = '</%s>' % utils.enc(tag.name)
-        self.outfp.write(utils.make_compat_bytes(out_s))
+        self._write(out_s)
         return
 
     def do_tag(self, tag, props=None):
         self.begin_tag(tag, props)
         self._stack.pop(-1)
         return
+
+    def _write(self, s: str):
+        self.outfp.write(s.encode(self.codec))
diff --git a/pdfminer/utils.py b/pdfminer/utils.py
@@ -46,13 +46,13 @@ def make_compat_bytes(in_str):
     return in_str.encode()
 
 
-def make_compat_str(in_str):
-    """Converts to string, guessing encoding."""
-    assert isinstance(in_str, (bytes, str)), str(type(in_str))
-    if isinstance(in_str, bytes):
-        enc = chardet.detect(in_str)
-        in_str = in_str.decode(enc['encoding'])
-    return in_str
+def make_compat_str(o):
+    """Converts everything to string, if bytes guessing the encoding."""
+    if isinstance(o, bytes):
+        enc = chardet.detect(o)
+        return o.decode(enc['encoding'])
+    else:
+        return str(o)
 
 
 def shorten_str(s, size):