Skip to content

Commit

Permalink
Fix the loading error of jsonl file (#644)
Browse files Browse the repository at this point in the history
* fixjsonl

Signed-off-by: XuhuiRen <xuhui.ren@intel.com>

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Signed-off-by: XuhuiRen <xuhui.ren@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
XuhuiRen and pre-commit-ci[bot] authored Sep 10, 2024
1 parent 48d4e53 commit 2fbce3e
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion comps/dataprep/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,16 @@ def load_json(json_path):
return content_list


def load_jsonl(jsonl_path):
"""Load and process jsonl file."""
content_list = []
with open(jsonl_path, "r") as file:
for line in file:
json_obj = json.loads(line)
content_list.append(json_obj)
return content_list


def load_yaml(yaml_path):
"""Load and process yaml file."""
with open(yaml_path, "r") as file:
Expand Down Expand Up @@ -351,8 +361,10 @@ def document_loader(doc_path):
return load_md(doc_path)
elif doc_path.endswith(".xml"):
return load_xml(doc_path)
elif doc_path.endswith(".json") or doc_path.endswith(".jsonl"):
elif doc_path.endswith(".json"):
return load_json(doc_path)
elif doc_path.endswith(".jsonl"):
return load_jsonl(doc_path)
elif doc_path.endswith(".yaml"):
return load_yaml(doc_path)
elif doc_path.endswith(".xlsx") or doc_path.endswith(".xls"):
Expand Down

0 comments on commit 2fbce3e

Please sign in to comment.