Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add new Data utility components for CSV/JSON parsing, routing, and filtering #3776

Merged
merged 15 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions src/backend/base/langflow/components/helpers/CSVtoData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from langflow.custom import Component
from langflow.io import FileInput, Output, MessageTextInput, MultilineInput
from langflow.schema import Data
from pathlib import Path
import csv
import io


class CSVToDataComponent(Component):

Check failure on line 9 in src/backend/base/langflow/components/helpers/CSVtoData.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (I001)

src/backend/base/langflow/components/helpers/CSVtoData.py:1:1: I001 Import block is un-sorted or un-formatted
display_name = "CSV to Data List"
description = "Load a CSV file, CSV from a file path, or a valid CSV string and convert it to a list of Data"
icon = "file-spreadsheet"
beta = True
name = "CSVtoData"

inputs = [
FileInput(
name="csv_file",
display_name="CSV File",
file_types=["csv"],
info="Upload a CSV file to convert to a list of Data objects",
),
MessageTextInput(
name="csv_path",
display_name="CSV File Path",
info="Provide the path to the CSV file as pure text",
),
MultilineInput(
name="csv_string",
display_name="CSV String",
info="Paste a CSV string directly to convert to a list of Data objects",
),
]

outputs = [
Output(name="data_list", display_name="Data List", method="load_csv_to_data"),
]

def load_csv_to_data(self) -> list[Data]:
try:
if sum(bool(field) for field in [self.csv_file, self.csv_path, self.csv_string]) != 1:
raise ValueError("Please provide exactly one of: CSV file, file path, or CSV string.")

Check failure on line 42 in src/backend/base/langflow/components/helpers/CSVtoData.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (EM101)

src/backend/base/langflow/components/helpers/CSVtoData.py:42:34: EM101 Exception must not use a string literal, assign to variable first

csv_data = None

if self.csv_file:
resolved_path = self.resolve_path(self.csv_file)
file_path = Path(resolved_path)
if file_path.suffix.lower() != ".csv":
raise ValueError("The provided file must be a CSV file.")

Check failure on line 50 in src/backend/base/langflow/components/helpers/CSVtoData.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (EM101)

src/backend/base/langflow/components/helpers/CSVtoData.py:50:38: EM101 Exception must not use a string literal, assign to variable first
with open(file_path, "r", newline="", encoding="utf-8") as csvfile:

Check failure on line 51 in src/backend/base/langflow/components/helpers/CSVtoData.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (UP015)

src/backend/base/langflow/components/helpers/CSVtoData.py:51:22: UP015 Unnecessary open mode parameters
csv_data = csvfile.read()

elif self.csv_path:
file_path = Path(self.csv_path)
if file_path.suffix.lower() != ".csv":
raise ValueError("The provided file must be a CSV file.")

Check failure on line 57 in src/backend/base/langflow/components/helpers/CSVtoData.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (EM101)

src/backend/base/langflow/components/helpers/CSVtoData.py:57:38: EM101 Exception must not use a string literal, assign to variable first
with open(file_path, "r", newline="", encoding="utf-8") as csvfile:

Check failure on line 58 in src/backend/base/langflow/components/helpers/CSVtoData.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (UP015)

src/backend/base/langflow/components/helpers/CSVtoData.py:58:22: UP015 Unnecessary open mode parameters
csv_data = csvfile.read()

elif self.csv_string:
csv_data = self.csv_string

if not csv_data:
raise ValueError("No CSV data provided.")

Check failure on line 65 in src/backend/base/langflow/components/helpers/CSVtoData.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (EM101)

src/backend/base/langflow/components/helpers/CSVtoData.py:65:34: EM101 Exception must not use a string literal, assign to variable first

result = []
csv_reader = csv.DictReader(io.StringIO(csv_data))
for row in csv_reader:
result.append(Data(data=row))

if not result:
self.status = "The CSV data is empty."
return []

self.status = result
return result

except csv.Error as e:
error_message = f"CSV parsing error: {str(e)}"
self.status = error_message
raise ValueError(error_message) from e

except Exception as e:
error_message = f"An error occurred: {str(e)}"
self.status = error_message
raise ValueError(error_message) from e
72 changes: 72 additions & 0 deletions src/backend/base/langflow/components/helpers/CurrentDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from datetime import datetime
from zoneinfo import ZoneInfo
from langflow.custom import Component
from langflow.io import DropdownInput, Output
from langflow.schema.message import Message


class CurrentDateComponent(Component):

Check failure on line 8 in src/backend/base/langflow/components/helpers/CurrentDate.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (I001)

src/backend/base/langflow/components/helpers/CurrentDate.py:1:1: I001 Import block is un-sorted or un-formatted
display_name = "Current Date"
description = "Returns the current date and time in the selected timezone."
icon = "clock"
beta = True
name = "CurrentDate"

inputs = [
DropdownInput(
name="timezone",
display_name="Timezone",
options=[
"UTC",
"US/Eastern",
"US/Central",
"US/Mountain",
"US/Pacific",
"Europe/London",
"Europe/Paris",
"Europe/Berlin",
"Europe/Moscow",
"Asia/Tokyo",
"Asia/Shanghai",
"Asia/Singapore",
"Asia/Dubai",
"Australia/Sydney",
"Australia/Melbourne",
"Pacific/Auckland",
"America/Sao_Paulo",
"America/Mexico_City",
"America/Toronto",
"America/Vancouver",
"Africa/Cairo",
"Africa/Johannesburg",
"Atlantic/Reykjavik",
"Indian/Maldives",
"America/Bogota",
"America/Lima",
"America/Santiago",
"America/Buenos_Aires",
"America/Caracas",
"America/La_Paz",
"America/Montevideo",
"America/Asuncion",
"America/Cuiaba",
],
value="UTC",
info="Select the timezone for the current date and time.",
),
]
outputs = [
Output(display_name="Current Date", name="current_date", method="get_current_date"),
]

def get_current_date(self) -> Message:
try:
tz = ZoneInfo(self.timezone)
current_date = datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S %Z")
result = f"Current date and time in {self.timezone}: {current_date}"
self.status = result
return Message(text=result)
except Exception as e:
error_message = f"Error: {str(e)}"
self.status = error_message
return Message(text=error_message)
125 changes: 125 additions & 0 deletions src/backend/base/langflow/components/helpers/DataConditionalRouter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from typing import Union, List

Check failure on line 1 in src/backend/base/langflow/components/helpers/DataConditionalRouter.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (UP035)

src/backend/base/langflow/components/helpers/DataConditionalRouter.py:1:1: UP035 `typing.List` is deprecated, use `list` instead
from langflow.custom import Component
from langflow.io import DataInput, MessageTextInput, DropdownInput, Output
from langflow.schema import Data, dotdict


class DataConditionalRouterComponent(Component):

Check failure on line 7 in src/backend/base/langflow/components/helpers/DataConditionalRouter.py

View workflow job for this annotation

GitHub Actions / Ruff Style Check (3.12)

Ruff (I001)

src/backend/base/langflow/components/helpers/DataConditionalRouter.py:1:1: I001 Import block is un-sorted or un-formatted
display_name = "Data Conditional Router"
description = "Route Data object(s) based on a condition applied to a specified key, including boolean validation."
icon = "split"
beta = True
name = "DataConditionalRouter"

inputs = [
DataInput(
name="data_input",
display_name="Data Input",
info="The Data object or list of Data objects to process",
is_list=True,
),
MessageTextInput(
name="key_name",
display_name="Key Name",
info="The name of the key in the Data object(s) to check",
),
DropdownInput(
name="operator",
display_name="Comparison Operator",
options=["equals", "not equals", "contains", "starts with", "ends with", "boolean validator"],
info="The operator to apply for comparing the values. 'boolean validator' treats the value as a boolean.",
value="equals",
),
MessageTextInput(
name="compare_value",
display_name="Compare Value",
info="The value to compare against (not used for boolean validator)",
),
]

outputs = [
Output(display_name="True Output", name="true_output", method="process_data"),
Output(display_name="False Output", name="false_output", method="process_data"),
]

def compare_values(self, item_value: str, compare_value: str, operator: str) -> bool:
if operator == "equals":
return item_value == compare_value
elif operator == "not equals":
return item_value != compare_value
elif operator == "contains":
return compare_value in item_value
elif operator == "starts with":
return item_value.startswith(compare_value)
elif operator == "ends with":
return item_value.endswith(compare_value)
elif operator == "boolean validator":
return self.parse_boolean(item_value)
return False

def parse_boolean(self, value):
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() in ["true", "1", "yes", "y", "on"]
return bool(value)

def validate_input(self, data_item: Data) -> bool:
if not isinstance(data_item, Data):
self.status = "Input is not a Data object"
return False
if self.key_name not in data_item.data:
self.status = f"Key '{self.key_name}' not found in Data"
return False
return True

def process_data(self) -> Union[Data, List[Data]]:
if isinstance(self.data_input, list):
true_output = []
false_output = []
for item in self.data_input:
if self.validate_input(item):
result = self.process_single_data(item)
if result:
true_output.append(item)
else:
false_output.append(item)
self.stop("false_output" if true_output else "true_output")
return true_output if true_output else false_output
else:
if not self.validate_input(self.data_input):
return Data(data={"error": self.status})
result = self.process_single_data(self.data_input)
self.stop("false_output" if result else "true_output")
return self.data_input

def process_single_data(self, data_item: Data) -> bool:
item_value = data_item.data[self.key_name]
operator = self.operator

if operator == "boolean validator":
condition_met = self.parse_boolean(item_value)
condition_description = f"Boolean validation of '{self.key_name}'"
else:
compare_value = self.compare_value
condition_met = self.compare_values(str(item_value), compare_value, operator)
condition_description = f"{self.key_name} {operator} {compare_value}"

if condition_met:
self.status = f"Condition met: {condition_description}"
return True
else:
self.status = f"Condition not met: {condition_description}"
return False

def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):
if field_name == "operator":
if field_value == "boolean validator":
build_config["compare_value"]["show"] = False
build_config["compare_value"]["advanced"] = True
build_config["compare_value"]["value"] = None
else:
build_config["compare_value"]["show"] = True
build_config["compare_value"]["advanced"] = False

return build_config
53 changes: 53 additions & 0 deletions src/backend/base/langflow/components/helpers/ExtractKey.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from typing import List, Union
from langflow.custom import Component
from langflow.io import DataInput, StrInput, Output
from langflow.schema import Data


class ExtractDataKeyComponent(Component):
display_name = "Extract Key"
description = "Extract a specific key from a Data object or a list of Data objects and return the extracted value(s) as Data object(s)."
icon = "key"
beta = True
name = "ExtractaKey"

inputs = [
DataInput(
name="data_input",
display_name="Data Input",
info="The Data object or list of Data objects to extract the key from.",
),
StrInput(
name="key",
display_name="Key to Extract",
info="The key in the Data object(s) to extract.",
),
]

outputs = [
Output(display_name="Extracted Data", name="extracted_data", method="extract_key"),
]

def extract_key(self) -> Union[Data, List[Data]]:
key = self.key

if isinstance(self.data_input, list):
result = []
for item in self.data_input:
if isinstance(item, Data) and key in item.data:
extracted_value = item.data[key]
result.append(Data(data={key: extracted_value}))
self.status = result
return result
elif isinstance(self.data_input, Data):
if key in self.data_input.data:
extracted_value = self.data_input.data[key]
result = Data(data={key: extracted_value})
self.status = result
return result
else:
self.status = f"Key '{key}' not found in Data object."
return Data(data={"error": f"Key '{key}' not found in Data object."})
else:
self.status = "Invalid input. Expected Data object or list of Data objects."
return Data(data={"error": "Invalid input. Expected Data object or list of Data objects."})
Loading
Loading