Skip to content

Commit

Permalink
Merge pull request #209 from george0st/change
Browse files Browse the repository at this point in the history
Extend check_csv_header
  • Loading branch information
george0st authored Aug 14, 2024
2 parents 8613694 + a8294c8 commit a689658
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 12 deletions.
2 changes: 0 additions & 2 deletions docs/todo_list.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

The list of expected/future improvements:

0. Extend columns in function '_check_csv_header'

1. Add ability to select type of output format (CSV or parquet) in commandline
- current solution generate both formats without ability to choose preferences

Expand Down
28 changes: 18 additions & 10 deletions tests/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,11 @@ def test_generate_bigbulk_repeat(self):
self.assertTrue(os.path.exists(path.join(dir, f"{basic_contact.BasicContact.NAME}.csv")))


def _check_csv_header(self, filename, key_text):
def _check_csv_header(self, filename, key_texts: list):
if os.path.exists(filename):
df = pd.read_csv(filename)
self.assertTrue(df.to_string().find(key_text) >= 0)
content = pd.read_csv(filename).to_string()
for key_text in key_texts:
self.assertTrue(content.find(key_text) >= 0)

def test_csv_structure(self):
"""All csv have header"""
Expand All @@ -142,12 +143,19 @@ def test_csv_structure(self):

dir = path.join(TestGenerator.OUTPUT_ADR, lbl)
self.assertTrue(os.path.exists(dir))
self._check_csv_header(path.join(dir, f"{basic_party.BasicParty.NAME}.csv"), "party_id")
self._check_csv_header(path.join(dir, f"{basic_contact.BasicContact.NAME}.csv"), "party_id")
self._check_csv_header(path.join(dir, f"{basic_relation.BasicRelation.NAME}.csv"), "party_id")
self._check_csv_header(path.join(dir, f"{basic_account.BasicAccount.NAME}.csv"), "party_id")
self._check_csv_header(path.join(dir, f"{basic_transaction.BasicTransaction.NAME}.csv"), "account_id")
self._check_csv_header(path.join(dir, f"{basic_event.BasicEvent.NAME}.csv"), "party_id")
self._check_csv_header(path.join(dir, f"{basic_communication.BasicCommunication.NAME}.csv"), "party_id")
self._check_csv_header(path.join(dir, f"{basic_party.BasicParty.NAME}.csv"),
["party_id", "party_gender"])
self._check_csv_header(path.join(dir, f"{basic_contact.BasicContact.NAME}.csv"),
["party_id", "contact_id", "contact_state"])
self._check_csv_header(path.join(dir, f"{basic_relation.BasicRelation.NAME}.csv"),
["party_id", "relation_id", "relation_type"])
self._check_csv_header(path.join(dir, f"{basic_account.BasicAccount.NAME}.csv"),
["party_id", "account_id", "account_state"])
self._check_csv_header(path.join(dir, f"{basic_transaction.BasicTransaction.NAME}.csv"),
["account_id", "transaction_id", "transaction_direction"])
self._check_csv_header(path.join(dir, f"{basic_event.BasicEvent.NAME}.csv"),
["party_id", "event_id", "session_id"])
self._check_csv_header(path.join(dir, f"{basic_communication.BasicCommunication.NAME}.csv"),
["party_id", "communication_id", "content", "content_sentiment"])

# TODO: Add batch size under limit, it will generate wrong dataset

0 comments on commit a689658

Please sign in to comment.