Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add new Data utility components for CSV/JSON parsing, routing, and filtering #3776

Merged
merged 15 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions src/backend/base/langflow/components/helpers/CSVtoData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import csv
import io
from pathlib import Path

from langflow.custom import Component
from langflow.io import FileInput, MessageTextInput, MultilineInput, Output
from langflow.schema import Data


class CSVToDataComponent(Component):
display_name = "CSV to Data List"
description = "Load a CSV file, CSV from a file path, or a valid CSV string and convert it to a list of Data"
icon = "file-spreadsheet"
beta = True
name = "CSVtoData"

inputs = [
FileInput(
name="csv_file",
display_name="CSV File",
file_types=["csv"],
info="Upload a CSV file to convert to a list of Data objects",
),
MessageTextInput(
name="csv_path",
display_name="CSV File Path",
info="Provide the path to the CSV file as pure text",
),
MultilineInput(
name="csv_string",
display_name="CSV String",
info="Paste a CSV string directly to convert to a list of Data objects",
),
]

outputs = [
Output(name="data_list", display_name="Data List", method="load_csv_to_data"),
]

def load_csv_to_data(self) -> list[Data]:
try:
if sum(bool(field) for field in [self.csv_file, self.csv_path, self.csv_string]) != 1:
msg = "Please provide exactly one of: CSV file, file path, or CSV string."
raise ValueError(msg)

csv_data = None

if self.csv_file:
resolved_path = self.resolve_path(self.csv_file)
file_path = Path(resolved_path)
if file_path.suffix.lower() != ".csv":
msg = "The provided file must be a CSV file."
raise ValueError(msg)
with open(file_path, newline="", encoding="utf-8") as csvfile:
csv_data = csvfile.read()

elif self.csv_path:
file_path = Path(self.csv_path)
if file_path.suffix.lower() != ".csv":
msg = "The provided file must be a CSV file."
raise ValueError(msg)
with open(file_path, newline="", encoding="utf-8") as csvfile:
csv_data = csvfile.read()

elif self.csv_string:
csv_data = self.csv_string

if not csv_data:
msg = "No CSV data provided."
raise ValueError(msg)

result = []
csv_reader = csv.DictReader(io.StringIO(csv_data))
for row in csv_reader:
result.append(Data(data=row))

if not result:
self.status = "The CSV data is empty."
return []

self.status = result
return result

except csv.Error as e:
error_message = f"CSV parsing error: {str(e)}"
self.status = error_message
raise ValueError(error_message) from e

except Exception as e:
error_message = f"An error occurred: {str(e)}"
self.status = error_message
raise ValueError(error_message) from e
73 changes: 73 additions & 0 deletions src/backend/base/langflow/components/helpers/CurrentDate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from datetime import datetime
from zoneinfo import ZoneInfo

from langflow.custom import Component
from langflow.io import DropdownInput, Output
from langflow.schema.message import Message


class CurrentDateComponent(Component):
display_name = "Current Date"
description = "Returns the current date and time in the selected timezone."
icon = "clock"
beta = True
name = "CurrentDate"

inputs = [
DropdownInput(
name="timezone",
display_name="Timezone",
options=[
"UTC",
"US/Eastern",
"US/Central",
"US/Mountain",
"US/Pacific",
"Europe/London",
"Europe/Paris",
"Europe/Berlin",
"Europe/Moscow",
"Asia/Tokyo",
"Asia/Shanghai",
"Asia/Singapore",
"Asia/Dubai",
"Australia/Sydney",
"Australia/Melbourne",
"Pacific/Auckland",
"America/Sao_Paulo",
"America/Mexico_City",
"America/Toronto",
"America/Vancouver",
"Africa/Cairo",
"Africa/Johannesburg",
"Atlantic/Reykjavik",
"Indian/Maldives",
"America/Bogota",
"America/Lima",
"America/Santiago",
"America/Buenos_Aires",
"America/Caracas",
"America/La_Paz",
"America/Montevideo",
"America/Asuncion",
"America/Cuiaba",
],
value="UTC",
info="Select the timezone for the current date and time.",
),
]
outputs = [
Output(display_name="Current Date", name="current_date", method="get_current_date"),
]

def get_current_date(self) -> Message:
try:
tz = ZoneInfo(self.timezone)
current_date = datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S %Z")
result = f"Current date and time in {self.timezone}: {current_date}"
self.status = result
return Message(text=result)
except Exception as e:
error_message = f"Error: {str(e)}"
self.status = error_message
return Message(text=error_message)
124 changes: 124 additions & 0 deletions src/backend/base/langflow/components/helpers/DataConditionalRouter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from typing import Any

from langflow.custom import Component
from langflow.io import DataInput, DropdownInput, MessageTextInput, Output
from langflow.schema import Data, dotdict


class DataConditionalRouterComponent(Component):
display_name = "Data Conditional Router"
description = "Route Data object(s) based on a condition applied to a specified key, including boolean validation."
icon = "split"
beta = True
name = "DataConditionalRouter"

inputs = [
DataInput(
name="data_input",
display_name="Data Input",
info="The Data object or list of Data objects to process",
is_list=True,
),
MessageTextInput(
name="key_name",
display_name="Key Name",
info="The name of the key in the Data object(s) to check",
),
DropdownInput(
name="operator",
display_name="Comparison Operator",
options=["equals", "not equals", "contains", "starts with", "ends with", "boolean validator"],
info="The operator to apply for comparing the values. 'boolean validator' treats the value as a boolean.",
value="equals",
),
MessageTextInput(
name="compare_value",
display_name="Compare Value",
info="The value to compare against (not used for boolean validator)",
),
]

outputs = [
Output(display_name="True Output", name="true_output", method="process_data"),
Output(display_name="False Output", name="false_output", method="process_data"),
]

def compare_values(self, item_value: str, compare_value: str, operator: str) -> bool:
if operator == "equals":
return item_value == compare_value
if operator == "not equals":
return item_value != compare_value
if operator == "contains":
return compare_value in item_value
if operator == "starts with":
return item_value.startswith(compare_value)
if operator == "ends with":
return item_value.endswith(compare_value)
if operator == "boolean validator":
return self.parse_boolean(item_value)
return False

def parse_boolean(self, value):
if isinstance(value, bool):
return value
if isinstance(value, str):
return value.lower() in ["true", "1", "yes", "y", "on"]
return bool(value)

def validate_input(self, data_item: Data) -> bool:
if not isinstance(data_item, Data):
self.status = "Input is not a Data object"
return False
if self.key_name not in data_item.data:
self.status = f"Key '{self.key_name}' not found in Data"
return False
return True

def process_data(self) -> Data | list[Data]:
if isinstance(self.data_input, list):
true_output = []
false_output = []
for item in self.data_input:
if self.validate_input(item):
result = self.process_single_data(item)
if result:
true_output.append(item)
else:
false_output.append(item)
self.stop("false_output" if true_output else "true_output")
return true_output if true_output else false_output
if not self.validate_input(self.data_input):
return Data(data={"error": self.status})
result = self.process_single_data(self.data_input)
self.stop("false_output" if result else "true_output")
return self.data_input

def process_single_data(self, data_item: Data) -> bool:
item_value = data_item.data[self.key_name]
operator = self.operator

if operator == "boolean validator":
condition_met = self.parse_boolean(item_value)
condition_description = f"Boolean validation of '{self.key_name}'"
else:
compare_value = self.compare_value
condition_met = self.compare_values(str(item_value), compare_value, operator)
condition_description = f"{self.key_name} {operator} {compare_value}"

if condition_met:
self.status = f"Condition met: {condition_description}"
return True
self.status = f"Condition not met: {condition_description}"
return False

def update_build_config(self, build_config: dotdict, field_value: Any, field_name: str | None = None):
if field_name == "operator":
if field_value == "boolean validator":
build_config["compare_value"]["show"] = False
build_config["compare_value"]["advanced"] = True
build_config["compare_value"]["value"] = None
else:
build_config["compare_value"]["show"] = True
build_config["compare_value"]["advanced"] = False

return build_config
53 changes: 53 additions & 0 deletions src/backend/base/langflow/components/helpers/ExtractKey.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from langflow.custom import Component
from langflow.io import DataInput, Output, StrInput
from langflow.schema import Data


class ExtractDataKeyComponent(Component):
display_name = "Extract Key"
description = (
"Extract a specific key from a Data object or a list of "
"Data objects and return the extracted value(s) as Data object(s)."
)
icon = "key"
beta = True
name = "ExtractaKey"

inputs = [
DataInput(
name="data_input",
display_name="Data Input",
info="The Data object or list of Data objects to extract the key from.",
),
StrInput(
name="key",
display_name="Key to Extract",
info="The key in the Data object(s) to extract.",
),
]

outputs = [
Output(display_name="Extracted Data", name="extracted_data", method="extract_key"),
]

def extract_key(self) -> Data | list[Data]:
key = self.key

if isinstance(self.data_input, list):
result = []
for item in self.data_input:
if isinstance(item, Data) and key in item.data:
extracted_value = item.data[key]
result.append(Data(data={key: extracted_value}))
self.status = result
return result
if isinstance(self.data_input, Data):
if key in self.data_input.data:
extracted_value = self.data_input.data[key]
result = Data(data={key: extracted_value})
self.status = result
return result
self.status = f"Key '{key}' not found in Data object."
return Data(data={"error": f"Key '{key}' not found in Data object."})
self.status = "Invalid input. Expected Data object or list of Data objects."
return Data(data={"error": "Invalid input. Expected Data object or list of Data objects."})
Loading
Loading