-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathingest.py
24 lines (18 loc) · 1.01 KB
/
ingest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
"""
This module defines the following routines used by the 'ingest' step of the regression recipe:
- ``load_file_as_dataframe``: Defines customizable logic for parsing dataset formats that are not
natively parsed by MLflow Recipes (i.e. formats other than Parquet, Delta, and Spark SQL).
"""
from pandas import DataFrame
def load_file_as_dataframe(location: str) -> DataFrame:
"""
Load content from the specified dataset file as a Pandas DataFrame.
This method is used to load dataset types that are not natively managed by MLflow Recipes
(datasets that are not in Parquet, Delta Table, or Spark SQL Table format). This method is
called once for each file in the dataset, and MLflow Recipes automatically combines the
resulting DataFrames together.
:param location: The path to the dataset file.
:return: A Pandas DataFrame representing the content of the specified file.
"""
# FIXME::OPTIONAL: implement the handling of non-natively supported file_format.
raise NotImplementedError