From fd6004ca350d87ddcd829616c266fcf6756a7572 Mon Sep 17 00:00:00 2001 From: Nadiyah Williams <102677874+Nadiyahlw@users.noreply.github.com> Date: Thu, 9 May 2024 15:02:50 -0400 Subject: [PATCH] Updating Branch (#936) * Revert "Add DATA605 directory (#876)" (#884) This reverts commit 00e90d2b83dd25d7c53988106dfbd24d937b2153. * SorrTask786 Unit test for split_positive_and_negative_parts() (#883) * Added Unit test for split_positive_and_negative_parts() * Resolved comments * Added remaining comments * deleted unnecessary files * "Fixes" * Comment resolution * Comments resolved. --------- Co-authored-by: Samarth KaPatel * SorrTask-782 Unit tests for calculate_vwap_twap() (#801) * Issue #782 draft PR with code skeleton * Issue #782 unit tests for function calculate_vwap_twap with 2 different resampling rules * Fix for ambiguous truth value of multiindex column names * Linter corrections on file * Changes for PR comment * Modifications to address PR comments * Adding full-stop to comments --------- Co-authored-by: Aishwarya Nidhi Co-authored-by: Samarth KaPatel * SorrTask-891 Unit tests for compute epoch function (#901) * Issue 891 - Unit test code skeleton * Issue-891 3 unit tests with each available unit value for Series input. * Issue-891 Unit test of compute epoch function for dataframe input with default unit * Adding missed full stop for comment * PR comment fixes * Removing todo from function * PR comment fixes --------- Co-authored-by: Aishwarya Nidhi Co-authored-by: Samarth KaPatel * SorTask903 Unit test convert_seconds_to_pandas_minutes() (#907) * Added unit test for function convert_seconds_to_minutes * Nits --------- Co-authored-by: Shaunak Dhande <77265046+Shaunak01@users.noreply.github.com> Co-authored-by: neha2801-create <77967216+neha2801-create@users.noreply.github.com> Co-authored-by: Samarth KaPatel Co-authored-by: Aishwarya Nidhi Co-authored-by: Aishwarya Nidhi --- core/finance/prediction_processing.py | 1 - .../test/test_prediction_processing.py | 136 ++++++++++++++++++ .../test/test_misc_transformations.py | 52 +++++++ docs/DATA605 | 1 - helpers/test/test_hdatetime.py | 34 +++++ research_amp/test/__init__.py | 0 research_amp/test/test_transform.py | 88 ++++++++++++ 7 files changed, 310 insertions(+), 2 deletions(-) delete mode 160000 docs/DATA605 create mode 100644 research_amp/test/__init__.py create mode 100644 research_amp/test/test_transform.py diff --git a/core/finance/prediction_processing.py b/core/finance/prediction_processing.py index ead07d5c29..2b6e0cddee 100644 --- a/core/finance/prediction_processing.py +++ b/core/finance/prediction_processing.py @@ -37,7 +37,6 @@ def compute_bar_start_timestamps( return srs -# TODO(Paul): Add unit tests. def compute_epoch( data: Union[pd.Series, pd.DataFrame], *, unit: Optional[str] = None ) -> Union[pd.Series, pd.DataFrame]: diff --git a/core/finance/test/test_prediction_processing.py b/core/finance/test/test_prediction_processing.py index d791ff29e8..627ceec5c9 100644 --- a/core/finance/test/test_prediction_processing.py +++ b/core/finance/test/test_prediction_processing.py @@ -58,3 +58,139 @@ def _get_data() -> pd.DataFrame: ) df.index.freq = "T" return df + + +# ############################################################################# + + +class TestComputeEpoch(hunitest.TestCase): + """ + Test the computation of epoch time series with different units. + """ + + def helper(self) -> pd.Series: + """ + Fetch input data for test. + """ + timestamp_index = pd.date_range("2024-01-01", periods=10, freq="T") + close = list(range(200, 210)) + data = {"close": close} + srs = pd.Series(data=data, index=timestamp_index) + return srs + + def test1(self) -> None: + """ + Check that epoch is computed correctly for minute unit. + """ + unit = "minute" + srs = self.helper() + result = cfiprpro.compute_epoch(srs, unit=unit) + # Define expected values. + expected_length = 10 + expected_column_value = None + expected_signature = r""" + minute + 2024-01-01 00:00:00 28401120 + 2024-01-01 00:01:00 28401121 + 2024-01-01 00:02:00 28401122 + 2024-01-01 00:03:00 28401123 + 2024-01-01 00:04:00 28401124 + 2024-01-01 00:05:00 28401125 + 2024-01-01 00:06:00 28401126 + 2024-01-01 00:07:00 28401127 + 2024-01-01 00:08:00 28401128 + 2024-01-01 00:09:00 28401129 + """ + # Check signature. + self.check_srs_output( + result, expected_length, expected_column_value, expected_signature + ) + + def test2(self) -> None: + """ + Check that epoch is computed correctly for second unit. + """ + unit = "second" + srs = self.helper() + result = cfiprpro.compute_epoch(srs, unit=unit) + # Define expected values. + expected_length = 10 + expected_column_value = None + expected_signature = r""" + second + 2024-01-01 00:00:00 1704067200 + 2024-01-01 00:01:00 1704067260 + 2024-01-01 00:02:00 1704067320 + 2024-01-01 00:03:00 1704067380 + 2024-01-01 00:04:00 1704067440 + 2024-01-01 00:05:00 1704067500 + 2024-01-01 00:06:00 1704067560 + 2024-01-01 00:07:00 1704067620 + 2024-01-01 00:08:00 1704067680 + 2024-01-01 00:09:00 1704067740 + """ + # Check signature. + self.check_srs_output( + result, expected_length, expected_column_value, expected_signature + ) + + def test3(self) -> None: + """ + Check that epoch is computed correctly for nanosecond unit. + """ + unit = "nanosecond" + srs = self.helper() + result = cfiprpro.compute_epoch(srs, unit=unit) + # Define expected values. + expected_length = 10 + expected_column_value = None + expected_signature = r""" + nanosecond + 2024-01-01 00:00:00 1704067200000000000 + 2024-01-01 00:01:00 1704067260000000000 + 2024-01-01 00:02:00 1704067320000000000 + 2024-01-01 00:03:00 1704067380000000000 + 2024-01-01 00:04:00 1704067440000000000 + 2024-01-01 00:05:00 1704067500000000000 + 2024-01-01 00:06:00 1704067560000000000 + 2024-01-01 00:07:00 1704067620000000000 + 2024-01-01 00:08:00 1704067680000000000 + 2024-01-01 00:09:00 1704067740000000000 + """ + # Check signature. + self.check_srs_output( + result, expected_length, expected_column_value, expected_signature + ) + + def test4(self) -> None: + """ + Check that epoch is computed correctly for dataframe input. + """ + srs = self.helper() + df = srs.to_frame() + result = cfiprpro.compute_epoch(df) + # Define expected values. + expected_length = 10 + expected_column_value = None + expected_signature = r""" + # df= + index=[2024-01-01 00:00:00, 2024-01-01 00:09:00] + columns=minute + shape=(10, 1) + minute + 2024-01-01 00:00:00 28401120 + 2024-01-01 00:01:00 28401121 + 2024-01-01 00:02:00 28401122 + ... + 2024-01-01 00:07:00 28401127 + 2024-01-01 00:08:00 28401128 + 2024-01-01 00:09:00 28401129 + """ + # Check signature. + self.check_df_output( + result, + expected_length, + expected_column_value, + expected_column_value, + expected_signature, + ) diff --git a/core/signal_processing/test/test_misc_transformations.py b/core/signal_processing/test/test_misc_transformations.py index c736b50660..5062ce5ac9 100644 --- a/core/signal_processing/test/test_misc_transformations.py +++ b/core/signal_processing/test/test_misc_transformations.py @@ -1,4 +1,5 @@ import logging +from typing import Union import numpy as np import pandas as pd @@ -223,3 +224,54 @@ def test1(self) -> None: 1 1.0 2 -2.0""" self.assert_equal(actual_str, expected_str, fuzzy_match=True) + + +class Test_split_positive_and_negative_parts(hunitest.TestCase): + @staticmethod + def get_test_data() -> pd.Series: + """ + Create artificial signal for unit test. + """ + data = [100, -50, 0, 75, -25] + index = pd.date_range(start="2023-04-01", periods=5) + test_data = pd.Series(data, index=index, name="position_intent_1") + return test_data + + def test1(self) -> None: + """ + Check that a Series input is processed correctly. + """ + series_input = self.get_test_data() + self.helper(series_input) + + def test2(self) -> None: + """ + Check that a DataFrame input is processed correctly. + """ + df_input = pd.DataFrame({"position_intent_1": self.get_test_data()}) + self.helper(df_input) + + def helper(self, input: Union[pd.Series, pd.DataFrame]) -> None: + actual_df = csprmitr.split_positive_and_negative_parts(input) + expected_length = 5 + expected_column_names = ["positive", "negative"] + expected_column_unique_values = None + expected_signature = r""" + # df= + index=[2023-04-01 00:00:00, 2023-04-05 00:00:00] + columns=positive,negative + shape=(5, 2) + positive negative + 2023-04-01 100.0 0.0 + 2023-04-02 0.0 50.0 + 2023-04-03 0.0 0.0 + 2023-04-04 75.0 0.0 + 2023-04-05 0.0 25.0 + """ + self.check_df_output( + actual_df, + expected_length, + expected_column_names, + expected_column_unique_values, + expected_signature, + ) diff --git a/docs/DATA605 b/docs/DATA605 deleted file mode 160000 index 4534473876..0000000000 --- a/docs/DATA605 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4534473876f4d4a0f4c22c9ae51e7466a3aef1f4 diff --git a/helpers/test/test_hdatetime.py b/helpers/test/test_hdatetime.py index 0f7837afa6..3754fa8a9c 100644 --- a/helpers/test/test_hdatetime.py +++ b/helpers/test/test_hdatetime.py @@ -576,6 +576,40 @@ def test3(self) -> None: self.assert_equal(str(act), str(exp)) +# ############################################################################# +# Test_convert_seconds_to_minutes +# ############################################################################# + + +class Test_convert_seconds_to_minutes(hunitest.TestCase): + def test1(self) -> None: + """ + Check that conversion is implemented correcty. + """ + num_secs = 300 + act = hdateti.convert_seconds_to_minutes(num_secs) + exp = int(num_secs / 60) + self.assertEqual(act, exp) + + def test2(self) -> None: + """ + Check that an error is raised when input is not an integer number of + minutes. + """ + num_secs = 10 + with self.assertRaises(AssertionError) as cm: + hdateti.convert_seconds_to_minutes(num_secs) + act = str(cm.exception) + exp = """ + * Failed assertion * + '10' + == + '0' + num_secs=10 is not an integer number of minutes + """ + self.assert_equal(act, exp, fuzzy_match=True) + + # ############################################################################# # Test_convert_unix_epoch_to_timestamp # ############################################################################# diff --git a/research_amp/test/__init__.py b/research_amp/test/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/research_amp/test/test_transform.py b/research_amp/test/test_transform.py new file mode 100644 index 0000000000..081da8a106 --- /dev/null +++ b/research_amp/test/test_transform.py @@ -0,0 +1,88 @@ +import pandas as pd + +import helpers.hunit_test as hunitest +import research_amp.transform as ramptran + + +class TestCalculateVwapTwap(hunitest.TestCase): + """ + Test the calculation of VWAP and TWAP with different resampling rules. + """ + + def helper(self) -> pd.DataFrame: + """ + Create data for testing. + """ + timestamp_index = pd.date_range("2024-01-01", periods=10, freq="T") + close = list(range(200, 210)) + volume = list(range(40, 50)) + asset_id = [11, 12] * 5 + data = { + "timestamp": timestamp_index, + "close": close, + "volume": volume, + "full_symbol": asset_id, + } + df = pd.DataFrame(data=data).set_index("timestamp") + return df + + def test1(self) -> None: + resample_rule = "5T" + df = self.helper() + result_df = ramptran.calculate_vwap_twap(df, resample_rule) + # Define expected values. + expected_length = 3 + expected_column_value = None + expected_signature = r""" + # df= + index=[2024-01-01 00:00:00, 2024-01-01 00:10:00] + columns=('close', 11),('close', 12),('twap', 11),('twap', 12),('volume', 11),('volume', 12),('vwap', 11),('vwap', 12) + shape=(3, 8) + close twap volume vwap + 11 12 11 12 11 12 11 12 + timestamp + 2024-01-01 00:00:00 200.0 NaN 200.0 NaN 40.0 NaN 200.000000 NaN + 2024-01-01 00:05:00 204.0 205.0 203.0 203.0 86.0 129.0 203.023256 203.062016 + 2024-01-01 00:10:00 208.0 209.0 207.0 208.0 94.0 96.0 207.021277 208.020833 + """ + # Check signature. + self.check_df_output( + result_df, + expected_length, + expected_column_value, + expected_column_value, + expected_signature, + ) + + def test2(self) -> None: + resample_rule = "1T" + df = self.helper() + result_df = ramptran.calculate_vwap_twap(df, resample_rule) + # Define expected values. + expected_length = 10 + expected_column_value = None + expected_signature = r""" + # df= + index=[2024-01-01 00:00:00, 2024-01-01 00:09:00] + columns=('close', 11),('close', 12),('twap', 11),('twap', 12),('volume', 11),('volume', 12),('vwap', 11),('vwap', 12) + shape=(10, 8) + close twap volume vwap + 11 12 11 12 11 12 11 12 + timestamp + 2024-01-01 00:00:00 200.0 NaN 200.0 NaN 40.0 NaN 200.0 NaN + 2024-01-01 00:01:00 NaN 201.0 NaN 201.0 NaN 41.0 NaN 201.0 + 2024-01-01 00:02:00 202.0 NaN 202.0 NaN 42.0 NaN 202.0 NaN + ... + timestamp + 2024-01-01 00:07:00 NaN 207.0 NaN 207.0 NaN 47.0 NaN 207.0 + 2024-01-01 00:08:00 208.0 NaN 208.0 NaN 48.0 NaN 208.0 NaN + 2024-01-01 00:09:00 NaN 209.0 NaN 209.0 NaN 49.0 NaN 209.0 + """ + # Check signature. + self.check_df_output( + result_df, + expected_length, + expected_column_value, + expected_column_value, + expected_signature, + )