You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts/pdf2txt.py", line 132, in
if name == 'main': sys.exit(main())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts/pdf2txt.py", line 127, in main
outfp = extract_text(**vars(A))
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts/pdf2txt.py", line 62, in extract_text
pdfminer3.high_level.extract_text_to_fp(fp, **locals())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/high_level.py", line 79, in extract_text_to_fp
interpreter.process_page(page)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 851, in process_page
self.render_contents(page.resources, page.contents, ctm=ctm)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 861, in render_contents
self.init_resources(resources)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 361, in init_resources
self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 211, in get_font
font = self.get_font(None, subspec)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 202, in get_font
font = PDFCIDFont(self, spec)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdffont.py", line 656, in init
self.cmap = CMapDB.get_cmap(name)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/cmapdb.py", line 257, in get_cmap
data = klass._load_data(name)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/cmapdb.py", line 231, in _load_data
name = name.replace("\0", "")
AttributeError: 'PDFStream' object has no attribute 'replace'
Traceback (most recent call last):
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts//pdf2txt.py", line 136, in
if name == 'main': sys.exit(main())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts//pdf2txt.py", line 131, in main
outfp = extract_text(**vars(A))
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts//pdf2txt.py", line 63, in extract_text
pdfminer.high_level.extract_text_to_fp(fp, **locals())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer/high_level.py", line 80, in extract_text_to_fp
check_extractable=True):
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer/pdfpage.py", line 132, in get_pages
raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
pdfminer.pdfdocument.PDFTextExtractionNotAllowed: Text extraction is not allowed: <_io.BufferedReader name='/pdf/00137/LTN20190121455.pdf'>
The text was updated successfully, but these errors were encountered:
Hi dear,
When I convert the pdfs to the text files, I meet some issues of the pdfminer. Six, could you please help to check and update? Many thanks.
case pdf 1: https://links.sgx.com/1.0.0/corporate-announcements/HOBG2B5Y0EVJ9PYQ/Manhattan%20Resources%20Limited%20-%20Offer%20Information%20Statement%20dated%2027%20November%202018.pdf
python ${pdfminer_path}/pdf2txt.py -M 99 -L 1 -o "/pdf/L02/HOBG2B5Y0EVJ9PYQ.txt" "/L02/HOBG2B5Y0EVJ9PYQ.pdf"
Traceback (most recent call last):
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts/pdf2txt.py", line 132, in
if name == 'main': sys.exit(main())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts/pdf2txt.py", line 127, in main
outfp = extract_text(**vars(A))
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts/pdf2txt.py", line 62, in extract_text
pdfminer3.high_level.extract_text_to_fp(fp, **locals())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/high_level.py", line 79, in extract_text_to_fp
interpreter.process_page(page)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 851, in process_page
self.render_contents(page.resources, page.contents, ctm=ctm)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 861, in render_contents
self.init_resources(resources)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 361, in init_resources
self.fontmap[fontid] = self.rsrcmgr.get_font(objid, spec)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 211, in get_font
font = self.get_font(None, subspec)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdfinterp.py", line 202, in get_font
font = PDFCIDFont(self, spec)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/pdffont.py", line 656, in init
self.cmap = CMapDB.get_cmap(name)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/cmapdb.py", line 257, in get_cmap
data = klass._load_data(name)
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer3/cmapdb.py", line 231, in _load_data
name = name.replace("\0", "")
AttributeError: 'PDFStream' object has no attribute 'replace'
case pdf 2: http://www3.hkexnews.hk/listedco/listconews/SEHK/2019/0121/LTN20190121455.pdf
python ${pdfminer_path}/pdf2txt.py -o "/pdf/00137/LTN20190121455.txt" "/pdf/00137/LTN20190121455.pdf"
Traceback (most recent call last):
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts//pdf2txt.py", line 136, in
if name == 'main': sys.exit(main())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts//pdf2txt.py", line 131, in main
outfp = extract_text(**vars(A))
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/EGG-INFO/scripts//pdf2txt.py", line 63, in extract_text
pdfminer.high_level.extract_text_to_fp(fp, **locals())
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer/high_level.py", line 80, in extract_text_to_fp
check_extractable=True):
File "/appvol/cnam/anaconda3/lib/python3.6/site-packages/pdfminer.six-20181108-py3.6.egg/pdfminer/pdfpage.py", line 132, in get_pages
raise PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
pdfminer.pdfdocument.PDFTextExtractionNotAllowed: Text extraction is not allowed: <_io.BufferedReader name='/pdf/00137/LTN20190121455.pdf'>
The text was updated successfully, but these errors were encountered: