You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Empty lists for reactions_from_text_in pdf and OSError: [Errno 22] Invalid argument: 'C:\\Users\\Lyubomir/.torch/iopath_cache\\s/gxy11xkkiwnpgog\\publaynet-tf_efficientdet_d1.pth.tar?dl=1.lock' for reaction_from_figures_in_pdf
#124
Open
LyuboKotop opened this issue
Sep 5, 2024
· 1 comment
File c:\users\lyubomir\openchemie\openchemie\interface.py:74, in OpenChemIE.pdfparser(self)
71 @Property
72 def pdfparser(self):
73 if self._pdfparser is None:
---> 74 self.init_pdfparser()
75 return self._pdfparser
File c:\users\lyubomir\openchemie\openchemie\interface.py:85, in OpenChemIE.init_pdfparser(self, ckpt_path)
79 """
80 Set model to custom checkpoint
81 Parameters:
82 ckpt_path: path to checkpoint to use, if None then will use default
83 """
84 config_path = "lp://efficientdet/PubLayNet/tf_efficientdet_d1"
---> 85 self._pdfparser = lp.AutoLayoutModel(config_path, model_path=ckpt_path, device=self.device.type)
File ~\anaconda3\envs\open_chemie\lib\site-packages\layoutparser\models\auto_layoutmodel.py:64, in AutoLayoutModel(config_path, model_path, label_map, device, extra_config)
62 for backend_name in ALL_AVAILABLE_BACKENDS:
63 if backend_name in config_path:
---> 64 return ALL_AVAILABLE_BACKENDS[backend_name](
65 config_path,
66 model_path=model_path,
67 label_map=label_map,
68 extra_config=extra_config,
69 device=device,
70 )
File ~\anaconda3\envs\open_chemie\lib\site-packages\portalocker\utils.py:256, in Lock.acquire(self, timeout, check_interval, fail_when_locked)
253 return fh
255 # Get a new filehandler
--> 256 fh = self._get_fh()
258 def try_close(): # pragma: no cover
259 # Silently try to close the handle if possible, ignore all issues
260 if fh is not None:
File ~\anaconda3\envs\open_chemie\lib\site-packages\portalocker\utils.py:313, in Lock._get_fh(self)
311 def _get_fh(self) -> typing.IO:
312 '''Get a new filehandle'''
--> 313 return open( # noqa: SIM115
314 self.filename,
315 self.mode,
316 **self.file_open_kwargs,
317 )
Hi,
I set up OpenChemIE by:
conda create -n openchemie python=3.9
conda activate openchemie
git clone https://github.com/CrystalEye42/OpenChemIE.git
cd OpenChemIE
pip install --editable .
pip install pdftotext
conda install -c conda-forge poppler
conda install jupyter
jupyter notebook
Then I create a jupyter notebook in the folder where the pdf example is (acs.joc.2c00749.pdf)
Then I do:
import torch
from openchemie import OpenChemIE
model = OpenChemIE()
pdf_path = 'acs.joc.2c00749.pdf'
text_results = model.extract_reactions_from_text_in_pdf(pdf_path)
text_results
Which returns:
[{'page': 1, 'reactions': []},
{'page': 2, 'reactions': []},
{'page': 3, 'reactions': []},
{'page': 4, 'reactions': []},
{'page': 5, 'reactions': []},
{'page': 6, 'reactions': []},
{'page': 7, 'reactions': []},
{'page': 8, 'reactions': []},
{'page': 9, 'reactions': []},
{'page': 10, 'reactions': []}]
When I do:
figure_results = model.extract_reactions_from_figures_in_pdf(pdf_path)
I get:
OSError Traceback (most recent call last)
Cell In[4], line 1
----> 1 figure_results = model.extract_reactions_from_figures_in_pdf(pdf_path)
File c:\users\lyubomir\openchemie\openchemie\interface.py:450, in OpenChemIE.extract_reactions_from_figures_in_pdf(self, pdf, batch_size, num_pages, molscribe, ocr)
404 def extract_reactions_from_figures_in_pdf(self, pdf, batch_size=16, num_pages=None, molscribe=True, ocr=True):
405 """
406 Get reaction information from figures in pdf
407 Parameters:
(...)
448 ]
449 """
--> 450 figures = self.extract_figures_from_pdf(pdf, num_pages=num_pages, output_bbox=True)
451 images = [figure['figure']['image'] for figure in figures]
452 results = self.extract_reactions_from_figures(images, batch_size=batch_size, molscribe=molscribe, ocr=ocr)
File c:\users\lyubomir\openchemie\openchemie\interface.py:203, in OpenChemIE.extract_figures_from_pdf(self, pdf, num_pages, output_bbox, output_image)
199 table_ext.set_output_image(output_image)
201 table_ext.set_output_bbox(output_bbox)
--> 203 return table_ext.extract_all_tables_and_figures(pages, self.pdfparser, content='figures')
File c:\users\lyubomir\openchemie\openchemie\interface.py:74, in OpenChemIE.pdfparser(self)
71 @Property
72 def pdfparser(self):
73 if self._pdfparser is None:
---> 74 self.init_pdfparser()
75 return self._pdfparser
File c:\users\lyubomir\openchemie\openchemie\interface.py:85, in OpenChemIE.init_pdfparser(self, ckpt_path)
79 """
80 Set model to custom checkpoint
81 Parameters:
82 ckpt_path: path to checkpoint to use, if None then will use default
83 """
84 config_path = "lp://efficientdet/PubLayNet/tf_efficientdet_d1"
---> 85 self._pdfparser = lp.AutoLayoutModel(config_path, model_path=ckpt_path, device=self.device.type)
File ~\anaconda3\envs\open_chemie\lib\site-packages\layoutparser\models\auto_layoutmodel.py:64, in AutoLayoutModel(config_path, model_path, label_map, device, extra_config)
62 for backend_name in ALL_AVAILABLE_BACKENDS:
63 if backend_name in config_path:
---> 64 return ALL_AVAILABLE_BACKENDS[backend_name](
65 config_path,
66 model_path=model_path,
67 label_map=label_map,
68 extra_config=extra_config,
69 device=device,
70 )
File ~\anaconda3\envs\open_chemie\lib\site-packages\layoutparser\models\effdet\layoutmodel.py:138, in EfficientDetLayoutModel.init(self, config_path, model_path, label_map, extra_config, enforce_cpu, device)
134 self.device = device
136 extra_config = extra_config if extra_config is not None else {}
--> 138 self._initialize_model(config_path, model_path, label_map, extra_config)
140 self.output_confidence_threshold = extra_config.get(
141 "output_confidence_threshold", self.DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD
142 )
144 self.preprocessor = InputTransform(self.config.image_size)
File ~\anaconda3\envs\open_chemie\lib\site-packages\layoutparser\models\effdet\layoutmodel.py:164, in EfficientDetLayoutModel._initialize_model(self, config_path, model_path, label_map, extra_config)
161 label_map = LABEL_MAP_CATALOG[dataset_name]
162 num_classes = len(label_map)
--> 164 model_path = PathManager.get_local_path(model_path)
166 self.model = create_model(
167 model_name,
168 num_classes=num_classes,
(...)
171 checkpoint_path=model_path,
172 )
173 else:
File ~\anaconda3\envs\open_chemie\lib\site-packages\iopath\common\file_io.py:1251, in PathManager.get_local_path(self, path, force, **kwargs)
1249 handler = self.__get_path_handler(path) # type: ignore
1250 try:
-> 1251 bret = handler._get_local_path(path, force=force, **kwargs)
1252 except TypeError:
1253 bret = handler._get_local_path(path, **kwargs)
File ~\anaconda3\envs\open_chemie\lib\site-packages\layoutparser\models\effdet\catalog.py:64, in LayoutParserEfficientDetModelHandler._get_local_path(self, path, **kwargs)
62 else:
63 raise ValueError(f"Unknown data_type {data_type}")
---> 64 return PathManager.get_local_path(model_url, **kwargs)
File ~\anaconda3\envs\open_chemie\lib\site-packages\iopath\common\file_io.py:1251, in PathManager.get_local_path(self, path, force, **kwargs)
1249 handler = self.__get_path_handler(path) # type: ignore
1250 try:
-> 1251 bret = handler._get_local_path(path, force=force, **kwargs)
1252 except TypeError:
1253 bret = handler._get_local_path(path, **kwargs)
File ~\anaconda3\envs\open_chemie\lib\site-packages\iopath\common\file_io.py:835, in HTTPURLHandler.get_local_path(self, path, force, cache_dir, **kwargs)
832 filename = filename[:100] + "" + uuid.uuid4().hex
834 cached = os.path.join(dirname, filename)
--> 835 with file_lock(cached):
836 if not os.path.isfile(cached):
837 logger.info("Downloading {} ...".format(path))
File ~\anaconda3\envs\open_chemie\lib\site-packages\portalocker\utils.py:302, in Lock.enter(self)
301 def enter(self) -> typing.IO[typing.AnyStr]:
--> 302 return self.acquire()
File ~\anaconda3\envs\open_chemie\lib\site-packages\portalocker\utils.py:256, in Lock.acquire(self, timeout, check_interval, fail_when_locked)
253 return fh
255 # Get a new filehandler
--> 256 fh = self._get_fh()
258 def try_close(): # pragma: no cover
259 # Silently try to close the handle if possible, ignore all issues
260 if fh is not None:
File ~\anaconda3\envs\open_chemie\lib\site-packages\portalocker\utils.py:313, in Lock._get_fh(self)
311 def _get_fh(self) -> typing.IO:
312 '''Get a new filehandle'''
--> 313 return open( # noqa: SIM115
314 self.filename,
315 self.mode,
316 **self.file_open_kwargs,
317 )
OSError: [Errno 22] Invalid argument: 'C:\Users\Lyubomir/.torch/iopath_cache\s/gxy11xkkiwnpgog\publaynet-tf_efficientdet_d1.pth.tar?dl=1.lock'
Any idea why the extract_reactions_from_text_in_pdf gives empty lists and the extract_reactions_from_figures_in_pdf throws an error?
Thank you in advance!
The text was updated successfully, but these errors were encountered: