Skip to content

Commit

Permalink
feat: update model_experiment.py to support basic EDA (#220)
Browse files Browse the repository at this point in the history
* Update model_experiment.py to support basic eda

It looks into the data first before the proposal.

* Update model_experiment.py

Revised linting

* Update model_experiment.py by fixing sorting order

* Update model_experiment.py for black linting

* Update model_experiment.py

* Update model_experiment.py

* Update model_experiment.py

* Update model_experiment.py

---------

Co-authored-by: WinstonLiyt <104308117+WinstonLiyt@users.noreply.github.com>
  • Loading branch information
xisen-w and WinstonLiyt committed Aug 28, 2024
1 parent 0d02c76 commit f766238
Showing 1 changed file with 25 additions and 2 deletions.
27 changes: 25 additions & 2 deletions rdagent/scenarios/kaggle/experiment/model_experiment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from pathlib import Path

import pandas as pd
from jinja2 import Environment, StrictUndefined

from rdagent.components.coder.model_coder.model import (
Expand Down Expand Up @@ -77,12 +78,34 @@ def background(self) -> str:
competition_features=self.competition_features,
)
)

return background_prompt

@property
def source_data(self) -> str:
raise NotImplementedError("source_data is not implemented")
kaggle_conf = KGDockerConf()
data_path = Path(f"{kaggle_conf.share_data_path}/{self.competition}")

csv_files = list(data_path.glob("*.csv"))

if not csv_files:
return "No CSV files found in the specified path."

dataset = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)

simple_eda = dataset.info(buf=None) # Capture the info output
data_shape = dataset.shape
data_head = dataset.head()

eda = (
f"Basic Info about the data:\n{simple_eda}\n"
f"Shape of the dataset: {data_shape}\n"
f"Sample Data:\n{data_head}\n"
)

data_description = self.competition_descriptions.get("Data Description", "No description provided")
eda += f"\nData Description:\n{data_description}"

return eda

@property
def output_format(self) -> str:
Expand Down

0 comments on commit f766238

Please sign in to comment.