You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2024-08-12 08:05:16.382 | ERROR | main:pdf_parse_main:135 - zero-size array to reduction operation maximum which has no identity
Traceback (most recent call last):
File "/home/founder/New/MinerU-master/demo/magic_pdf_parse_main.py", line 224, in
pdf_parse_main(pdf_path)
│ └ '/home/founder/New/pdf/1.pdf'
└ <function pdf_parse_main at 0x7fac4b7765f0>
File "/home/founder/New/MinerU-master/demo/magic_pdf_parse_main.py", line 117, in pdf_parse_main
pipe.pipe_analyze() # 解析
│ └ <function TXTPipe.pipe_analyze at 0x7fac4b776170>
└ <magic_pdf.pipe.TXTPipe.TXTPipe object at 0x7fac3ebcc880>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/pipe/TXTPipe.py", line 20, in pipe_analyze
self.model_list = doc_analyze(self.pdf_bytes, ocr=False)
│ │ │ │ └ b'%PDF-1.7\r%\xe2\xe3\xcf\xd3\r\n308 0 obj\r<</Linearized 1/L 643506/O 310/E 111574/N 6/T 642787/H [ 660 557]>>\rendobj\r ...
│ │ │ └ <magic_pdf.pipe.TXTPipe.TXTPipe object at 0x7fac3ebcc880>
│ │ └ <function doc_analyze at 0x7fad6f68e9e0>
│ └ []
└ <magic_pdf.pipe.TXTPipe.TXTPipe object at 0x7fac3ebcc880>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/doc_analyze_by_custom_model.py", line 119, in doc_analyze
result = custom_model(img)
│ └ array([[[255, 255, 255],
│ [255, 255, 255],
│ [255, 255, 255],
│ ...,
│ [255, 255, 255],
│ [255...
└ <magic_pdf.model.pdf_extract_kit.CustomPEKModel object at 0x7fac3ebcc730>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 182, in call
for mf_img in dataloader:
└ <torch.utils.data.dataloader.DataLoader object at 0x7faab681c3a0>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 631, in next
data = self._next_data()
│ └ <function _SingleProcessDataLoaderIter._next_data at 0x7fabf422cc10>
└ <torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7faa0f90e560>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
│ │ │ └ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33...
│ │ └ <function _MapDatasetFetcher.fetch at 0x7fabf43b5d80>
│ └ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7faa0f90e260>
└ <torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7faa0f90e560>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
│ │ └ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33...
│ └ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7faab550fc70>
└ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7faa0f90e260>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in
data = [self.dataset[idx] for idx in possibly_batched_index]
│ │ │ └ 27
│ │ └ 27
│ └ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7faab550fc70>
└ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7faa0f90e260>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 82, in getitem
image = self.transform(raw_image)
│ │ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
│ └ Compose(
│ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
│ )
└ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7faab550fc70>
File "/home/founder/.local/lib/python3.10/site-packages/torchvision/transforms/transforms.py", line 95, in call
img = t(img)
│ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
└ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 164, in call
image = self.prepare_input(item)
│ │ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
│ └ <function FormulaImageBaseProcessor.prepare_input at 0x7faadc049900>
└ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 51, in prepare_input
img = self.crop_margin(img.convert("RGB"))
│ │ │ └ <function Image.convert at 0x7fac402824d0>
│ │ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
│ └ <staticmethod(<function FormulaImageBaseProcessor.crop_margin at 0x7faadc04ab90>)>
└ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 29, in crop_margin
max_val = data.max()
│ └ <method 'max' of 'numpy.ndarray' objects>
└ array([], shape=(0, 143), dtype=uint8)
File "/home/founder/.local/lib/python3.10/site-packages/numpy/core/_methods.py", line 41, in _amax
return umr_maximum(a, axis, None, out, keepdims, initial, where)
│ │ │ │ │ │ └ True
│ │ │ │ │ └
│ │ │ │ └ False
│ │ │ └ None
│ │ └ None
│ └ array([], shape=(0, 143), dtype=uint8)
└ <built-in method reduce of numpy.ufunc object at 0x7fad6e6cc440>
ValueError: zero-size array to reduction operation maximum which has no identity
How to reproduce the bug | 如何复现
magic_pdf_parse_main.py
软件版本0.7.x
Operating system | 操作系统
Linux
Python version | Python 版本
3.10
Software version | 软件版本 (magic-pdf --version)
0.6.x
Device mode | 设备模式
cuda
The text was updated successfully, but these errors were encountered:
Description of the bug | 错误描述
2024-08-12 08:05:16.382 | ERROR | main:pdf_parse_main:135 - zero-size array to reduction operation maximum which has no identity
Traceback (most recent call last):
File "/home/founder/New/MinerU-master/demo/magic_pdf_parse_main.py", line 224, in
pdf_parse_main(pdf_path)
│ └ '/home/founder/New/pdf/1.pdf'
└ <function pdf_parse_main at 0x7fac4b7765f0>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/pipe/TXTPipe.py", line 20, in pipe_analyze
self.model_list = doc_analyze(self.pdf_bytes, ocr=False)
│ │ │ │ └ b'%PDF-1.7\r%\xe2\xe3\xcf\xd3\r\n308 0 obj\r<</Linearized 1/L 643506/O 310/E 111574/N 6/T 642787/H [ 660 557]>>\rendobj\r ...
│ │ │ └ <magic_pdf.pipe.TXTPipe.TXTPipe object at 0x7fac3ebcc880>
│ │ └ <function doc_analyze at 0x7fad6f68e9e0>
│ └ []
└ <magic_pdf.pipe.TXTPipe.TXTPipe object at 0x7fac3ebcc880>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/doc_analyze_by_custom_model.py", line 119, in doc_analyze
result = custom_model(img)
│ └ array([[[255, 255, 255],
│ [255, 255, 255],
│ [255, 255, 255],
│ ...,
│ [255, 255, 255],
│ [255...
└ <magic_pdf.model.pdf_extract_kit.CustomPEKModel object at 0x7fac3ebcc730>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 182, in call
for mf_img in dataloader:
└ <torch.utils.data.dataloader.DataLoader object at 0x7faab681c3a0>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 631, in next
data = self._next_data()
│ └ <function _SingleProcessDataLoaderIter._next_data at 0x7fabf422cc10>
└ <torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7faa0f90e560>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
│ │ │ └ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33...
│ │ └ <function _MapDatasetFetcher.fetch at 0x7fabf43b5d80>
│ └ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7faa0f90e260>
└ <torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x7faa0f90e560>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
│ │ └ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33...
│ └ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7faab550fc70>
└ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7faa0f90e260>
File "/home/founder/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 51, in
data = [self.dataset[idx] for idx in possibly_batched_index]
│ │ │ └ 27
│ │ └ 27
│ └ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7faab550fc70>
└ <torch.utils.data._utils.fetch._MapDatasetFetcher object at 0x7faa0f90e260>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/magic_pdf/model/pdf_extract_kit.py", line 82, in getitem
image = self.transform(raw_image)
│ │ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
│ └ Compose(
│ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
│ )
└ <magic_pdf.model.pdf_extract_kit.MathDataset object at 0x7faab550fc70>
File "/home/founder/.local/lib/python3.10/site-packages/torchvision/transforms/transforms.py", line 95, in call
img = t(img)
│ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
└ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 164, in call
image = self.prepare_input(item)
│ │ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
│ └ <function FormulaImageBaseProcessor.prepare_input at 0x7faadc049900>
└ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 51, in prepare_input
img = self.crop_margin(img.convert("RGB"))
│ │ │ └ <function Image.convert at 0x7fac402824d0>
│ │ └ <PIL.Image.Image image mode=RGB size=143x0 at 0x7FAAB6954460>
│ └ <staticmethod(<function FormulaImageBaseProcessor.crop_margin at 0x7faadc04ab90>)>
└ <unimernet.processors.formula_processor.FormulaImageEvalProcessor object at 0x7faab550ff10>
File "/home/founder/anaconda3/envs/MinerU/lib/python3.10/site-packages/unimernet/processors/formula_processor.py", line 29, in crop_margin
max_val = data.max()
│ └ <method 'max' of 'numpy.ndarray' objects>
└ array([], shape=(0, 143), dtype=uint8)
File "/home/founder/.local/lib/python3.10/site-packages/numpy/core/_methods.py", line 41, in _amax
return umr_maximum(a, axis, None, out, keepdims, initial, where)
│ │ │ │ │ │ └ True
│ │ │ │ │ └
│ │ │ │ └ False
│ │ │ └ None
│ │ └ None
│ └ array([], shape=(0, 143), dtype=uint8)
└ <built-in method reduce of numpy.ufunc object at 0x7fad6e6cc440>
ValueError: zero-size array to reduction operation maximum which has no identity
How to reproduce the bug | 如何复现
magic_pdf_parse_main.py
软件版本0.7.x
Operating system | 操作系统
Linux
Python version | Python 版本
3.10
Software version | 软件版本 (magic-pdf --version)
0.6.x
Device mode | 设备模式
cuda
The text was updated successfully, but these errors were encountered: