Skip to content

Commit 1632ed8

Browse files
committed
BUG: Improved thread safety for read_html() GH16928
1 parent 7ffe7fc commit 1632ed8

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

doc/source/whatsnew/v0.21.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ I/O
164164

165165
- Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
166166

167+
- Bug in :func:`read_html` import check fails when run concurrently (:issue:`16928`)
168+
167169
Plotting
168170
^^^^^^^^
169171
- Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)

pandas/io/html.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,6 @@ def _importers():
3737
if _IMPORTS:
3838
return
3939

40-
_IMPORTS = True
41-
4240
global _HAS_BS4, _HAS_LXML, _HAS_HTML5LIB
4341

4442
try:
@@ -59,6 +57,8 @@ def _importers():
5957
except ImportError:
6058
pass
6159

60+
_IMPORTS = True
61+
6262

6363
#############
6464
# READ HTML #

pandas/tests/io/test_html.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
import glob
44
import os
55
import re
6+
import threading
67
import warnings
78

9+
10+
# imports needed for Python 3.x but will fail under Python 2.x
811
try:
9-
from importlib import import_module
12+
from importlib import import_module, reload
1013
except ImportError:
1114
import_module = __import__
1215

16+
1317
from distutils.version import LooseVersion
1418

1519
import pytest
@@ -22,6 +26,7 @@
2226
from pandas.compat import (map, zip, StringIO, string_types, BytesIO,
2327
is_platform_windows, PY3)
2428
from pandas.io.common import URLError, urlopen, file_path_to_url
29+
import pandas.io.html
2530
from pandas.io.html import read_html
2631
from pandas._libs.parsers import ParserError
2732

@@ -931,3 +936,31 @@ def test_same_ordering():
931936
dfs_lxml = read_html(filename, index_col=0, flavor=['lxml'])
932937
dfs_bs4 = read_html(filename, index_col=0, flavor=['bs4'])
933938
assert_framelist_equal(dfs_lxml, dfs_bs4)
939+
940+
941+
class ErrorThread(threading.Thread):
942+
def run(self):
943+
try:
944+
super(ErrorThread, self).run()
945+
except Exception as e:
946+
self.err = e
947+
else:
948+
self.err = None
949+
950+
951+
@pytest.mark.slow
952+
def test_importcheck_thread_safety():
953+
954+
# force import check by reinitalising global vars in html.py
955+
reload(pandas.io.html)
956+
957+
filename = os.path.join(DATA_PATH, 'valid_markup.html')
958+
helper_thread1 = ErrorThread(target=read_html, args=(filename,))
959+
helper_thread2 = ErrorThread(target=read_html, args=(filename,))
960+
961+
helper_thread1.start()
962+
helper_thread2.start()
963+
964+
while(helper_thread1.is_alive() or helper_thread2.is_alive()):
965+
pass
966+
assert None is helper_thread1.err is helper_thread2.err

0 commit comments

Comments
 (0)