From 805b633022d0dc2a16188106ec94f2399d632365 Mon Sep 17 00:00:00 2001 From: Aishwarya Nidhi Date: Mon, 15 Apr 2024 23:51:07 -0400 Subject: [PATCH 1/7] Issue #782 draft PR with code skeleton --- .../transform/test/test_transform_utils.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/im_v2/common/data/transform/test/test_transform_utils.py b/im_v2/common/data/transform/test/test_transform_utils.py index 2cc9ed8dd3..0d89bf4859 100644 --- a/im_v2/common/data/transform/test/test_transform_utils.py +++ b/im_v2/common/data/transform/test/test_transform_utils.py @@ -776,3 +776,25 @@ def _get_test_data(self) -> None: scratch_dir = self.get_scratch_space() aws_profile = "ck" hs3.copy_data_from_s3_to_local_dir(s3_input_dir, scratch_dir, aws_profile) + + +# ############################################################################# + + +class TestCalculateVwap(hunitest.TestCase): + + def generate_test_data(self): + """ + Fetch data for price and volume columns with timestamp index + returns: pandas dataframe with timestamp index, price column and volume column + """ + + def test_calculate_vwap_with_resample_kwargs(self) -> None: + """ + Verify calculated price for given volume from data WITH resample rules + """ + + def test_calculate_vwap_no_resample_kwargs(self) -> None: + """ + Verify calculated price for given volume from data WITHOUT resample rules + """ \ No newline at end of file From cc5c77549b287a4adb7ac668359eaa067283a3a3 Mon Sep 17 00:00:00 2001 From: Aishwarya Nidhi Date: Tue, 23 Apr 2024 14:01:00 -0400 Subject: [PATCH 2/7] Issue #782 unit tests for function calculate_vwap_twap with 2 different resampling rules --- .../transform/test/test_transform_utils.py | 81 +++++++++++++++++-- 1 file changed, 74 insertions(+), 7 deletions(-) diff --git a/im_v2/common/data/transform/test/test_transform_utils.py b/im_v2/common/data/transform/test/test_transform_utils.py index 0d89bf4859..8fd3ee0839 100644 --- a/im_v2/common/data/transform/test/test_transform_utils.py +++ b/im_v2/common/data/transform/test/test_transform_utils.py @@ -3,6 +3,7 @@ import pandas as pd import pytest +import random import data_schema.dataset_schema_utils as dsdascut import helpers.hdbg as hdbg @@ -14,6 +15,7 @@ import im_v2.common.data.extract.extract_utils as imvcdeexut import im_v2.common.data.transform.resample_daily_bid_ask_data as imvcdtrdbad import im_v2.common.data.transform.transform_utils as imvcdttrut +import research_amp.transform as ramptran class TestGetVendorEpochUnit(hunitest.TestCase): @@ -781,20 +783,85 @@ def _get_test_data(self) -> None: # ############################################################################# -class TestCalculateVwap(hunitest.TestCase): +class TestCalculateVwapTwap(hunitest.TestCase): - def generate_test_data(self): + def get_test_data(self): """ Fetch data for price and volume columns with timestamp index - returns: pandas dataframe with timestamp index, price column and volume column + returns: pandas dataframe with timestamp index, close and volume column with asset_id as full_sumbol """ + # + index_timestamp = pd.date_range("2024-01-01", periods=2, freq="min") + close = [34501.279, 29952.436] + volume = [229894.0, 99837.4] + asset_id = [18109, 15479] + d = {"timestamp":index_timestamp, "close":close, "volume":volume, "full_symbol":asset_id} + df = pd.DataFrame(data=d).set_index("timestamp") + expected_df_columns = pd.MultiIndex.from_product( + [["close", "twap", "volume", "vwap"], sorted(asset_id)] + ) + return df, expected_df_columns - def test_calculate_vwap_with_resample_kwargs(self) -> None: + def test_calculate_vwap_twap_with_5T_resample_kwargs(self) -> None: """ - Verify calculated price for given volume from data WITH resample rules + Verify calculated price for given volume from data WITH resample rule """ + df, expected_df_columns = self.get_test_data() + result_df = ramptran.calculate_vwap_twap( + df, "5T" + ) + #Expected Values + expected_length = 2 + expected_column_unique_values = None + expected_signature=r""" + # df= + index=[2024-01-01 00:00:00, 2024-01-01 00:05:00] + columns=('close', 15479),('close', 18109),('twap', 15479),('twap', 18109),('volume', 15479),('volume', 18109),('vwap', 15479),('vwap', 18109) + shape=(2, 8) + close twap volume vwap + 15479 18109 15479 18109 15479 18109 15479 18109 + timestamp + 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 + 2024-01-01 00:05:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN + """ + # Check. + self.check_df_output( + result_df, + expected_length, + expected_df_columns, + expected_column_unique_values, + expected_signature, + ) + - def test_calculate_vwap_no_resample_kwargs(self) -> None: + def test_calculate_vwap_twap_with_halfT_resample_kwargs(self) -> None: """ Verify calculated price for given volume from data WITHOUT resample rules - """ \ No newline at end of file + """ + df, expected_df_columns = self.get_test_data() + result_df = ramptran.calculate_vwap_twap( + df, "0.5T" + ) + #Expected Values + expected_length = 3 + expected_column_unique_values = None + expected_signature = r""" + # df= + index=[2024-01-01 00:00:00, 2024-01-01 00:01:00] + columns=('close', 15479),('close', 18109),('twap', 15479),('twap', 18109),('volume', 15479),('volume', 18109),('vwap', 15479),('vwap', 18109) + shape=(3, 8) + close twap volume vwap + 15479 18109 15479 18109 15479 18109 15479 18109 + timestamp + 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 + 2024-01-01 00:00:30 NaN NaN NaN NaN NaN NaN NaN NaN + 2024-01-01 00:01:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN + """ + # Check. + self.check_df_output( + result_df, + expected_length, + expected_df_columns, + expected_column_unique_values, + expected_signature, + ) \ No newline at end of file From a47c77ca7dc12ddc0c5331660800336bde9d70a4 Mon Sep 17 00:00:00 2001 From: Aishwarya Nidhi Date: Tue, 23 Apr 2024 14:59:04 -0400 Subject: [PATCH 3/7] Fix for ambiguous truth value of multiindex column names --- im_v2/common/data/transform/test/test_transform_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/im_v2/common/data/transform/test/test_transform_utils.py b/im_v2/common/data/transform/test/test_transform_utils.py index 8fd3ee0839..ae3662db80 100644 --- a/im_v2/common/data/transform/test/test_transform_utils.py +++ b/im_v2/common/data/transform/test/test_transform_utils.py @@ -799,7 +799,7 @@ def get_test_data(self): df = pd.DataFrame(data=d).set_index("timestamp") expected_df_columns = pd.MultiIndex.from_product( [["close", "twap", "volume", "vwap"], sorted(asset_id)] - ) + ).to_list() return df, expected_df_columns def test_calculate_vwap_twap_with_5T_resample_kwargs(self) -> None: From 0c8372cca4a238cf5e62aa4e3e0c7175606d9797 Mon Sep 17 00:00:00 2001 From: Aishwarya Nidhi Date: Thu, 25 Apr 2024 11:12:27 -0400 Subject: [PATCH 4/7] Linter corrections on file --- .../transform/test/test_transform_utils.py | 62 +++++++++---------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/im_v2/common/data/transform/test/test_transform_utils.py b/im_v2/common/data/transform/test/test_transform_utils.py index ae3662db80..88ffda36bf 100644 --- a/im_v2/common/data/transform/test/test_transform_utils.py +++ b/im_v2/common/data/transform/test/test_transform_utils.py @@ -3,7 +3,6 @@ import pandas as pd import pytest -import random import data_schema.dataset_schema_utils as dsdascut import helpers.hdbg as hdbg @@ -784,7 +783,6 @@ def _get_test_data(self) -> None: class TestCalculateVwapTwap(hunitest.TestCase): - def get_test_data(self): """ Fetch data for price and volume columns with timestamp index @@ -795,36 +793,40 @@ def get_test_data(self): close = [34501.279, 29952.436] volume = [229894.0, 99837.4] asset_id = [18109, 15479] - d = {"timestamp":index_timestamp, "close":close, "volume":volume, "full_symbol":asset_id} + d = { + "timestamp": index_timestamp, + "close": close, + "volume": volume, + "full_symbol": asset_id, + } df = pd.DataFrame(data=d).set_index("timestamp") expected_df_columns = pd.MultiIndex.from_product( [["close", "twap", "volume", "vwap"], sorted(asset_id)] ).to_list() return df, expected_df_columns - + def test_calculate_vwap_twap_with_5T_resample_kwargs(self) -> None: """ - Verify calculated price for given volume from data WITH resample rule + Verify dataframe signature for given volume from data WITH 5T resample + rule. """ df, expected_df_columns = self.get_test_data() - result_df = ramptran.calculate_vwap_twap( - df, "5T" - ) - #Expected Values + result_df = ramptran.calculate_vwap_twap(df, "5T") + # Expected Values expected_length = 2 expected_column_unique_values = None - expected_signature=r""" + expected_signature = r""" # df= index=[2024-01-01 00:00:00, 2024-01-01 00:05:00] columns=('close', 15479),('close', 18109),('twap', 15479),('twap', 18109),('volume', 15479),('volume', 18109),('vwap', 15479),('vwap', 18109) shape=(2, 8) - close twap volume vwap - 15479 18109 15479 18109 15479 18109 15479 18109 - timestamp - 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 - 2024-01-01 00:05:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN + close twap volume vwap + 15479 18109 15479 18109 15479 18109 15479 18109 + timestamp + 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 + 2024-01-01 00:05:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN """ - # Check. + # Check signature self.check_df_output( result_df, expected_length, @@ -832,17 +834,15 @@ def test_calculate_vwap_twap_with_5T_resample_kwargs(self) -> None: expected_column_unique_values, expected_signature, ) - def test_calculate_vwap_twap_with_halfT_resample_kwargs(self) -> None: """ - Verify calculated price for given volume from data WITHOUT resample rules + Verify dataframe signature for given volume from data WITH .5T resample + rule. """ df, expected_df_columns = self.get_test_data() - result_df = ramptran.calculate_vwap_twap( - df, "0.5T" - ) - #Expected Values + result_df = ramptran.calculate_vwap_twap(df, "0.5T") + # Expected Values expected_length = 3 expected_column_unique_values = None expected_signature = r""" @@ -850,18 +850,18 @@ def test_calculate_vwap_twap_with_halfT_resample_kwargs(self) -> None: index=[2024-01-01 00:00:00, 2024-01-01 00:01:00] columns=('close', 15479),('close', 18109),('twap', 15479),('twap', 18109),('volume', 15479),('volume', 18109),('vwap', 15479),('vwap', 18109) shape=(3, 8) - close twap volume vwap - 15479 18109 15479 18109 15479 18109 15479 18109 - timestamp - 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 - 2024-01-01 00:00:30 NaN NaN NaN NaN NaN NaN NaN NaN - 2024-01-01 00:01:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN - """ - # Check. + close twap volume vwap + 15479 18109 15479 18109 15479 18109 15479 18109 + timestamp + 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 + 2024-01-01 00:00:30 NaN NaN NaN NaN NaN NaN NaN NaN + 2024-01-01 00:01:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN + """ + # Check signature self.check_df_output( result_df, expected_length, expected_df_columns, expected_column_unique_values, expected_signature, - ) \ No newline at end of file + ) From 92f8d099ffc7df4238d95faca925266b407bb240 Mon Sep 17 00:00:00 2001 From: Aishwarya Nidhi Date: Fri, 26 Apr 2024 14:04:03 -0400 Subject: [PATCH 5/7] Changes for PR comment --- .../transform/test/test_transform_utils.py | 89 ------------------ research_amp/test/__init__.py | 0 research_amp/test/test_transform.py | 92 +++++++++++++++++++ 3 files changed, 92 insertions(+), 89 deletions(-) create mode 100644 research_amp/test/__init__.py create mode 100644 research_amp/test/test_transform.py diff --git a/im_v2/common/data/transform/test/test_transform_utils.py b/im_v2/common/data/transform/test/test_transform_utils.py index 88ffda36bf..2cc9ed8dd3 100644 --- a/im_v2/common/data/transform/test/test_transform_utils.py +++ b/im_v2/common/data/transform/test/test_transform_utils.py @@ -14,7 +14,6 @@ import im_v2.common.data.extract.extract_utils as imvcdeexut import im_v2.common.data.transform.resample_daily_bid_ask_data as imvcdtrdbad import im_v2.common.data.transform.transform_utils as imvcdttrut -import research_amp.transform as ramptran class TestGetVendorEpochUnit(hunitest.TestCase): @@ -777,91 +776,3 @@ def _get_test_data(self) -> None: scratch_dir = self.get_scratch_space() aws_profile = "ck" hs3.copy_data_from_s3_to_local_dir(s3_input_dir, scratch_dir, aws_profile) - - -# ############################################################################# - - -class TestCalculateVwapTwap(hunitest.TestCase): - def get_test_data(self): - """ - Fetch data for price and volume columns with timestamp index - returns: pandas dataframe with timestamp index, close and volume column with asset_id as full_sumbol - """ - # - index_timestamp = pd.date_range("2024-01-01", periods=2, freq="min") - close = [34501.279, 29952.436] - volume = [229894.0, 99837.4] - asset_id = [18109, 15479] - d = { - "timestamp": index_timestamp, - "close": close, - "volume": volume, - "full_symbol": asset_id, - } - df = pd.DataFrame(data=d).set_index("timestamp") - expected_df_columns = pd.MultiIndex.from_product( - [["close", "twap", "volume", "vwap"], sorted(asset_id)] - ).to_list() - return df, expected_df_columns - - def test_calculate_vwap_twap_with_5T_resample_kwargs(self) -> None: - """ - Verify dataframe signature for given volume from data WITH 5T resample - rule. - """ - df, expected_df_columns = self.get_test_data() - result_df = ramptran.calculate_vwap_twap(df, "5T") - # Expected Values - expected_length = 2 - expected_column_unique_values = None - expected_signature = r""" - # df= - index=[2024-01-01 00:00:00, 2024-01-01 00:05:00] - columns=('close', 15479),('close', 18109),('twap', 15479),('twap', 18109),('volume', 15479),('volume', 18109),('vwap', 15479),('vwap', 18109) - shape=(2, 8) - close twap volume vwap - 15479 18109 15479 18109 15479 18109 15479 18109 - timestamp - 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 - 2024-01-01 00:05:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN - """ - # Check signature - self.check_df_output( - result_df, - expected_length, - expected_df_columns, - expected_column_unique_values, - expected_signature, - ) - - def test_calculate_vwap_twap_with_halfT_resample_kwargs(self) -> None: - """ - Verify dataframe signature for given volume from data WITH .5T resample - rule. - """ - df, expected_df_columns = self.get_test_data() - result_df = ramptran.calculate_vwap_twap(df, "0.5T") - # Expected Values - expected_length = 3 - expected_column_unique_values = None - expected_signature = r""" - # df= - index=[2024-01-01 00:00:00, 2024-01-01 00:01:00] - columns=('close', 15479),('close', 18109),('twap', 15479),('twap', 18109),('volume', 15479),('volume', 18109),('vwap', 15479),('vwap', 18109) - shape=(3, 8) - close twap volume vwap - 15479 18109 15479 18109 15479 18109 15479 18109 - timestamp - 2024-01-01 00:00:00 NaN 34501.279 NaN 34501.279 NaN 229894.0 NaN 34501.279 - 2024-01-01 00:00:30 NaN NaN NaN NaN NaN NaN NaN NaN - 2024-01-01 00:01:00 29952.436 NaN 29952.436 NaN 99837.4 NaN 29952.436 NaN - """ - # Check signature - self.check_df_output( - result_df, - expected_length, - expected_df_columns, - expected_column_unique_values, - expected_signature, - ) diff --git a/research_amp/test/__init__.py b/research_amp/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/research_amp/test/test_transform.py b/research_amp/test/test_transform.py new file mode 100644 index 0000000000..22a05772f7 --- /dev/null +++ b/research_amp/test/test_transform.py @@ -0,0 +1,92 @@ +import pandas as pd + +import helpers.hunit_test as hunitest +import research_amp.transform as ramptran + + +class TestCalculateVwapTwap(hunitest.TestCase): + """ + Test the calculation of VWAP and TWAP with different resampling rules. + """ + + def helper(self) -> pd.DataFrame: + """ + Create data for testing. + + :returns: pandas dataframe with timestamp index, close and + volume column with asset_id as full_sumbol + """ + # + timestamp_index = pd.date_range("2024-01-01", periods=10, freq="T") + close = list(range(200, 210)) + volume = list(range(40, 50)) + asset_id = [11, 12] * 5 + data = { + "timestamp": timestamp_index, + "close": close, + "volume": volume, + "full_symbol": asset_id, + } + df = pd.DataFrame(data=data).set_index("timestamp") + return df + + def test_5T_resample(self) -> None: + resample_rule = "5T" + df = self.helper() + result_df = ramptran.calculate_vwap_twap(df, resample_rule) + # Expected Values + expected_length = 3 + expected_column_value = None + expected_signature = r""" + # df= + index=[2024-01-01 00:00:00, 2024-01-01 00:10:00] + columns=('close', 11),('close', 12),('twap', 11),('twap', 12),('volume', 11),('volume', 12),('vwap', 11),('vwap', 12) + shape=(3, 8) + close twap volume vwap + 11 12 11 12 11 12 11 12 + timestamp + 2024-01-01 00:00:00 200.0 NaN 200.0 NaN 40.0 NaN 200.000000 NaN + 2024-01-01 00:05:00 204.0 205.0 203.0 203.0 86.0 129.0 203.023256 203.062016 + 2024-01-01 00:10:00 208.0 209.0 207.0 208.0 94.0 96.0 207.021277 208.020833 + """ + # Check signature + self.check_df_output( + result_df, + expected_length, + expected_column_value, + expected_column_value, + expected_signature, + ) + + def test_T_resample(self) -> None: + resample_rule = "1T" + df = self.helper() + result_df = ramptran.calculate_vwap_twap(df, resample_rule) + # Expected Values + expected_length = 10 + expected_column_value = None + expected_signature = r""" + # df= + index=[2024-01-01 00:00:00, 2024-01-01 00:09:00] + columns=('close', 11),('close', 12),('twap', 11),('twap', 12),('volume', 11),('volume', 12),('vwap', 11),('vwap', 12) + shape=(10, 8) + close twap volume vwap + 11 12 11 12 11 12 11 12 + timestamp + 2024-01-01 00:00:00 200.0 NaN 200.0 NaN 40.0 NaN 200.0 NaN + 2024-01-01 00:01:00 NaN 201.0 NaN 201.0 NaN 41.0 NaN 201.0 + 2024-01-01 00:02:00 202.0 NaN 202.0 NaN 42.0 NaN 202.0 NaN + ... + timestamp + 2024-01-01 00:07:00 NaN 207.0 NaN 207.0 NaN 47.0 NaN 207.0 + 2024-01-01 00:08:00 208.0 NaN 208.0 NaN 48.0 NaN 208.0 NaN + 2024-01-01 00:09:00 NaN 209.0 NaN 209.0 NaN 49.0 NaN 209.0 + """ + # Check signature + self.check_df_output( + result_df, + expected_length, + expected_column_value, + expected_column_value, + expected_signature, + ) From 90af319a503897fa8a96a0480a9c64eb46232688 Mon Sep 17 00:00:00 2001 From: Aishwarya Nidhi Date: Mon, 29 Apr 2024 11:48:21 -0400 Subject: [PATCH 6/7] Modifications to address PR comments --- research_amp/test/test_transform.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/research_amp/test/test_transform.py b/research_amp/test/test_transform.py index 22a05772f7..4b333edcb7 100644 --- a/research_amp/test/test_transform.py +++ b/research_amp/test/test_transform.py @@ -12,11 +12,7 @@ class TestCalculateVwapTwap(hunitest.TestCase): def helper(self) -> pd.DataFrame: """ Create data for testing. - - :returns: pandas dataframe with timestamp index, close and - volume column with asset_id as full_sumbol """ - # timestamp_index = pd.date_range("2024-01-01", periods=10, freq="T") close = list(range(200, 210)) volume = list(range(40, 50)) @@ -30,11 +26,11 @@ def helper(self) -> pd.DataFrame: df = pd.DataFrame(data=data).set_index("timestamp") return df - def test_5T_resample(self) -> None: + def test1(self) -> None: resample_rule = "5T" df = self.helper() result_df = ramptran.calculate_vwap_twap(df, resample_rule) - # Expected Values + # Define expected values expected_length = 3 expected_column_value = None expected_signature = r""" @@ -58,11 +54,11 @@ def test_5T_resample(self) -> None: expected_signature, ) - def test_T_resample(self) -> None: + def test2(self) -> None: resample_rule = "1T" df = self.helper() result_df = ramptran.calculate_vwap_twap(df, resample_rule) - # Expected Values + # Define expected values expected_length = 10 expected_column_value = None expected_signature = r""" From 631568f6f38eba095668cf1dfe8630a2b939aaf7 Mon Sep 17 00:00:00 2001 From: Aishwarya Nidhi Date: Mon, 29 Apr 2024 12:58:50 -0400 Subject: [PATCH 7/7] Adding full-stop to comments --- research_amp/test/test_transform.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/research_amp/test/test_transform.py b/research_amp/test/test_transform.py index 4b333edcb7..081da8a106 100644 --- a/research_amp/test/test_transform.py +++ b/research_amp/test/test_transform.py @@ -30,7 +30,7 @@ def test1(self) -> None: resample_rule = "5T" df = self.helper() result_df = ramptran.calculate_vwap_twap(df, resample_rule) - # Define expected values + # Define expected values. expected_length = 3 expected_column_value = None expected_signature = r""" @@ -45,7 +45,7 @@ def test1(self) -> None: 2024-01-01 00:05:00 204.0 205.0 203.0 203.0 86.0 129.0 203.023256 203.062016 2024-01-01 00:10:00 208.0 209.0 207.0 208.0 94.0 96.0 207.021277 208.020833 """ - # Check signature + # Check signature. self.check_df_output( result_df, expected_length, @@ -58,7 +58,7 @@ def test2(self) -> None: resample_rule = "1T" df = self.helper() result_df = ramptran.calculate_vwap_twap(df, resample_rule) - # Define expected values + # Define expected values. expected_length = 10 expected_column_value = None expected_signature = r""" @@ -78,7 +78,7 @@ def test2(self) -> None: 2024-01-01 00:08:00 208.0 NaN 208.0 NaN 48.0 NaN 208.0 NaN 2024-01-01 00:09:00 NaN 209.0 NaN 209.0 NaN 49.0 NaN 209.0 """ - # Check signature + # Check signature. self.check_df_output( result_df, expected_length,