-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add dataframe operations component (#5341)
* add dataframe operations component * populate entire new column with value Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * [autofix.ci] apply automated fixes * Add unit tests for DataFrame operations in `test_dataframe_operations.py` * **Import modules** - Import `pytest` and `pandas` for testing DataFrame operations * **Define test cases** - Define test cases for edge cases like empty DataFrames and invalid column names - Include tests for operations like "Head", "Tail", and "Replace Value" - Use `pytest.mark.parametrize` to test multiple operations with different inputs - Add detailed assertions to verify the correctness of DataFrame operations * [autofix.ci] apply automated fixes * Remove test cases for DataFrame operations from `test_dataframe_operations.py`. This deletion includes all unit tests related to various DataFrame operations such as adding, dropping, filtering, and renaming columns, as well as handling edge cases like empty DataFrames and invalid operations. The removal streamlines the test suite by eliminating outdated or redundant tests. * Add unit tests for DataFrame operations in - Introduced a new test file for organizing test components. - Updated import paths for to reflect the new module structure. - Refactored test cases to use for better readability and maintainability. - Enhanced assertions in tests for various DataFrame operations, including handling of empty DataFrames and invalid operations. - Improved code formatting for consistency and clarity. * Refactor DataFrameOperationsComponent for improved readability and maintainability - Consolidated import statements for clarity. - Renamed variable `df` to `dataframe_copy` for better understanding. - Streamlined the `perform_operation` method by replacing `elif` with `if` statements for clearer logic flow. - Enhanced error message for unsupported operations to improve debugging. These changes aim to enhance the code structure and make future modifications easier. * Update unit tests for DataFrame operations in `test_dataframe_operations.py` - Modified expected values in parameterized tests for various DataFrame operations, including "Add Column", "Filter", "Sort", "Head", "Tail", and "Replace Value" to reflect new test scenarios. - Adjusted assertions to ensure they correctly validate the output of operations, particularly for lists of expected values. - Enhanced error handling in the test for invalid operations to provide clearer feedback on unsupported operation types. These changes improve the accuracy and robustness of the unit tests for DataFrame operations. * Refactor DataFrameOperationsComponent methods to return DataFrame instances consistently --------- Co-authored-by: Gabriel Luiz Freitas Almeida <gabriel@langflow.org> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
41f8329
commit 62c13ad
Showing
3 changed files
with
296 additions
and
0 deletions.
There are no files selected for viewing
212 changes: 212 additions & 0 deletions
212
src/backend/base/langflow/components/processing/dataframe_operations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
from langflow.custom import Component | ||
from langflow.io import BoolInput, DataFrameInput, DropdownInput, IntInput, MessageTextInput, Output, StrInput | ||
from langflow.schema import DataFrame | ||
|
||
|
||
class DataFrameOperationsComponent(Component): | ||
display_name = "DataFrame Operations" | ||
description = "Perform various operations on a DataFrame." | ||
icon = "table" | ||
|
||
# Available operations | ||
OPERATION_CHOICES = [ | ||
"Add Column", | ||
"Drop Column", | ||
"Filter", | ||
"Head", | ||
"Rename Column", | ||
"Replace Value", | ||
"Select Columns", | ||
"Sort", | ||
"Tail", | ||
] | ||
|
||
inputs = [ | ||
DataFrameInput( | ||
name="df", | ||
display_name="DataFrame", | ||
info="The input DataFrame to operate on.", | ||
), | ||
DropdownInput( | ||
name="operation", | ||
display_name="Operation", | ||
options=OPERATION_CHOICES, | ||
info="Select the DataFrame operation to perform.", | ||
real_time_refresh=True, | ||
), | ||
StrInput( | ||
name="column_name", | ||
display_name="Column Name", | ||
info="The column name to use for the operation.", | ||
dynamic=True, | ||
show=False, | ||
), | ||
MessageTextInput( | ||
name="filter_value", | ||
display_name="Filter Value", | ||
info="The value to filter rows by.", | ||
dynamic=True, | ||
show=False, | ||
), | ||
BoolInput( | ||
name="ascending", | ||
display_name="Sort Ascending", | ||
info="Whether to sort in ascending order.", | ||
dynamic=True, | ||
show=False, | ||
value=True, | ||
), | ||
StrInput( | ||
name="new_column_name", | ||
display_name="New Column Name", | ||
info="The new column name when renaming or adding a column.", | ||
dynamic=True, | ||
show=False, | ||
), | ||
MessageTextInput( | ||
name="new_column_value", | ||
display_name="New Column Value", | ||
info="The value to populate the new column with.", | ||
dynamic=True, | ||
show=False, | ||
), | ||
StrInput( | ||
name="columns_to_select", | ||
display_name="Columns to Select", | ||
dynamic=True, | ||
is_list=True, | ||
show=False, | ||
), | ||
IntInput( | ||
name="num_rows", | ||
display_name="Number of Rows", | ||
info="Number of rows to return (for head/tail).", | ||
dynamic=True, | ||
show=False, | ||
value=5, | ||
), | ||
MessageTextInput( | ||
name="replace_value", | ||
display_name="Value to Replace", | ||
info="The value to replace in the column.", | ||
dynamic=True, | ||
show=False, | ||
), | ||
MessageTextInput( | ||
name="replacement_value", | ||
display_name="Replacement Value", | ||
info="The value to replace with.", | ||
dynamic=True, | ||
show=False, | ||
), | ||
] | ||
|
||
outputs = [ | ||
Output( | ||
display_name="DataFrame", | ||
name="output", | ||
method="perform_operation", | ||
info="The resulting DataFrame after the operation.", | ||
) | ||
] | ||
|
||
def update_build_config(self, build_config, field_value, field_name=None): | ||
# Hide all dynamic fields by default | ||
dynamic_fields = [ | ||
"column_name", | ||
"filter_value", | ||
"ascending", | ||
"new_column_name", | ||
"new_column_value", | ||
"columns_to_select", | ||
"num_rows", | ||
"replace_value", | ||
"replacement_value", | ||
] | ||
for field in dynamic_fields: | ||
build_config[field]["show"] = False | ||
|
||
# Show relevant fields based on the selected operation | ||
if field_name == "operation": | ||
if field_value == "Filter": | ||
build_config["column_name"]["show"] = True | ||
build_config["filter_value"]["show"] = True | ||
elif field_value == "Sort": | ||
build_config["column_name"]["show"] = True | ||
build_config["ascending"]["show"] = True | ||
elif field_value == "Drop Column": | ||
build_config["column_name"]["show"] = True | ||
elif field_value == "Rename Column": | ||
build_config["column_name"]["show"] = True | ||
build_config["new_column_name"]["show"] = True | ||
elif field_value == "Add Column": | ||
build_config["new_column_name"]["show"] = True | ||
build_config["new_column_value"]["show"] = True | ||
elif field_value == "Select Columns": | ||
build_config["columns_to_select"]["show"] = True | ||
elif field_value in ["Head", "Tail"]: | ||
build_config["num_rows"]["show"] = True | ||
elif field_value == "Replace Value": | ||
build_config["column_name"]["show"] = True | ||
build_config["replace_value"]["show"] = True | ||
build_config["replacement_value"]["show"] = True | ||
|
||
return build_config | ||
|
||
def perform_operation(self) -> DataFrame: | ||
dataframe_copy = self.df.copy() | ||
operation = self.operation | ||
|
||
if operation == "Filter": | ||
return self.filter_rows_by_value(dataframe_copy) | ||
if operation == "Sort": | ||
return self.sort_by_column(dataframe_copy) | ||
if operation == "Drop Column": | ||
return self.drop_column(dataframe_copy) | ||
if operation == "Rename Column": | ||
return self.rename_column(dataframe_copy) | ||
if operation == "Add Column": | ||
return self.add_column(dataframe_copy) | ||
if operation == "Select Columns": | ||
return self.select_columns(dataframe_copy) | ||
if operation == "Head": | ||
return self.head(dataframe_copy) | ||
if operation == "Tail": | ||
return self.tail(dataframe_copy) | ||
if operation == "Replace Value": | ||
return self.replace_values(dataframe_copy) | ||
msg = f"Unsupported operation: {operation}" | ||
|
||
raise ValueError(msg) | ||
|
||
# Existing methods | ||
def filter_rows_by_value(self, df: DataFrame) -> DataFrame: | ||
return DataFrame(df[df[self.column_name] == self.filter_value]) | ||
|
||
def sort_by_column(self, df: DataFrame) -> DataFrame: | ||
return DataFrame(df.sort_values(by=self.column_name, ascending=self.ascending)) | ||
|
||
def drop_column(self, df: DataFrame) -> DataFrame: | ||
return DataFrame(df.drop(columns=[self.column_name])) | ||
|
||
def rename_column(self, df: DataFrame) -> DataFrame: | ||
return DataFrame(df.rename(columns={self.column_name: self.new_column_name})) | ||
|
||
def add_column(self, df: DataFrame) -> DataFrame: | ||
df[self.new_column_name] = [self.new_column_value] * len(df) | ||
return DataFrame(df) | ||
|
||
def select_columns(self, df: DataFrame) -> DataFrame: | ||
columns = [col.strip() for col in self.columns_to_select] | ||
return DataFrame(df[columns]) | ||
|
||
# New methods | ||
def head(self, df: DataFrame) -> DataFrame: | ||
return DataFrame(df.head(self.num_rows)) | ||
|
||
def tail(self, df: DataFrame) -> DataFrame: | ||
return DataFrame(df.tail(self.num_rows)) | ||
|
||
def replace_values(self, df: DataFrame) -> DataFrame: | ||
df[self.column_name] = df[self.column_name].replace(self.replace_value, self.replacement_value) | ||
return DataFrame(df) |
Empty file.
84 changes: 84 additions & 0 deletions
84
src/backend/tests/unit/components/processing/test_dataframe_operations.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
import pandas as pd | ||
import pytest | ||
from langflow.components.processing.dataframe_operations import DataFrameOperationsComponent | ||
|
||
|
||
@pytest.fixture | ||
def sample_dataframe(): | ||
data = {"A": [1, 2, 3, 4, 5], "B": [5, 4, 3, 2, 1], "C": ["a", "b", "c", "d", "e"]} | ||
return pd.DataFrame(data) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
("operation", "expected_columns", "expected_values"), | ||
[ | ||
("Add Column", ["A", "B", "C", "D"], [1, 5, "a", 10]), | ||
("Drop Column", ["A", "C"], None), | ||
("Filter", ["A", "B", "C"], [3, 3, "c"]), | ||
("Sort", ["A", "B", "C"], [5, 1, "e"]), | ||
("Rename Column", ["Z", "B", "C"], None), | ||
("Select Columns", ["A", "C"], None), | ||
("Head", ["A", "B", "C"], [1, 5, "a"]), | ||
("Tail", ["A", "B", "C"], [5, 1, "e"]), | ||
("Replace Value", ["A", "B", "C"], [1, 5, "z"]), | ||
], | ||
) | ||
def test_operations(sample_dataframe, operation, expected_columns, expected_values): | ||
component = DataFrameOperationsComponent() | ||
component.df = sample_dataframe | ||
component.operation = operation | ||
|
||
if operation == "Add Column": | ||
component.new_column_name = "D" | ||
component.new_column_value = 10 | ||
elif operation == "Drop Column": | ||
component.column_name = "B" | ||
elif operation == "Filter": | ||
component.column_name = "A" | ||
component.filter_value = 3 | ||
elif operation == "Sort": | ||
component.column_name = "A" | ||
component.ascending = False | ||
elif operation == "Rename Column": | ||
component.column_name = "A" | ||
component.new_column_name = "Z" | ||
elif operation == "Select Columns": | ||
component.columns_to_select = ["A", "C"] | ||
elif operation in ("Head", "Tail"): | ||
component.num_rows = 1 | ||
elif operation == "Replace Value": | ||
component.column_name = "C" | ||
component.replace_value = "a" | ||
component.replacement_value = "z" | ||
|
||
result = component.perform_operation() | ||
|
||
assert list(result.columns) == expected_columns | ||
if expected_values is not None and isinstance(expected_values, list): | ||
assert list(result.iloc[0]) == expected_values | ||
|
||
|
||
def test_empty_dataframe(): | ||
component = DataFrameOperationsComponent() | ||
component.df = pd.DataFrame() | ||
component.operation = "Head" | ||
component.num_rows = 3 | ||
result = component.perform_operation() | ||
assert result.empty | ||
|
||
|
||
def test_non_existent_column(): | ||
component = DataFrameOperationsComponent() | ||
component.df = pd.DataFrame({"A": [1, 2, 3]}) | ||
component.operation = "Drop Column" | ||
component.column_name = "B" | ||
with pytest.raises(KeyError): | ||
component.perform_operation() | ||
|
||
|
||
def test_invalid_operation(): | ||
component = DataFrameOperationsComponent() | ||
component.df = pd.DataFrame({"A": [1, 2, 3]}) | ||
component.operation = "Invalid Operation" | ||
with pytest.raises(ValueError, match="Unsupported operation: Invalid Operation"): | ||
component.perform_operation() |