Description
Hi
I wonder if you could help with this problem: I am using pandas to read and process some web data. Until I updated all my python libraries recently, everything worked fine, but I am now receiving the error above. I appear to have html5lib version 0.9999999 now, but I am unsure what the previous version is. Pandas uses the read_html function to collect tables from the webpage. The complete Traceback of the Error is given below.
ImportError Traceback (most recent call last)
Cell In[2], line 5
1 # First collect the current records for each distance at the track
2 # We need a dictionary of track record times
4 RECORDS_URL = 'http://www.mauritiusturfclub.com/index.php/en/racing/track-records'
----> 5 records = pd.read_html(RECORDS_URL)
7 for index, row in records[0].iterrows():
8 dist = float(row['Distance'].strip('m'))
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py:311, in deprecate_nonkeyword_arguments..decorate..wrapper(*args, **kwargs)
305 if len(args) > num_allow_args:
306 warnings.warn(
307 msg.format(arguments=arguments),
308 FutureWarning,
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/html.py:1100, in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only)
950 @deprecate_nonkeyword_arguments(version="2.0")
951 def read_html(
952 io: FilePath | ReadBuffer[str],
(...)
966 displayed_only: bool = True,
967 ) -> list[DataFrame]:
968 r"""
969 Read HTML tables into a list
of DataFrame
objects.
970
(...)
1098 <io.read_html>` for some examples of reading in HTML tables.
1099 """
-> 1100 _importers()
1102 # Type check here. We don't want to parse only to fail because of an
1103 # invalid value of an integer skiprows.
1104 if isinstance(skiprows, numbers.Integral) and skiprows < 0:
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/html.py:66, in _importers()
63 lxml = import_optional_dependency("lxml.etree", errors="ignore")
64 _HAS_LXML = lxml is not None
---> 66 html5lib = import_optional_dependency("html5lib", errors="ignore")
67 _HAS_HTML5LIB = html5lib is not None
69 _IMPORTS = True
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/compat/_optional.py:154, in import_optional_dependency(name, extra, errors, min_version)
152 minimum_version = min_version if min_version is not None else VERSIONS.get(parent)
153 if minimum_version:
--> 154 version = get_version(module_to_get)
155 if version and Version(version) < Version(minimum_version):
156 msg = (
157 f"Pandas requires version '{minimum_version}' or newer of '{parent}' "
158 f"(version '{version}' currently installed)."
159 )
File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/compat/_optional.py:80, in get_version(module)
76 if module.name == "snappy":
77 # snappy doesn't contain attributes to confirm it's version
78 # See intake/python-snappy#119
79 return ""
---> 80 raise ImportError(f"Can't determine version for {module.name}")
81 if module.name == "psycopg2":
82 # psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
83 version = version.split()[0]
ImportError: Can't determine version for html5lib
Thanks
Michael Parry