Add files via upload

CLAIR-LAB-TECHNION · Sep 1, 2024 · 49ab880 · 49ab880
1 parent ceab0db
commit 49ab880
Show file tree

Hide file tree

Showing 5 changed files with 287 additions and 0 deletions.
diff --git a/project/README.md b/project/README.md
@@ -0,0 +1,40 @@
+# CLAI-HW1 Repository
+
+<h1 align="center">
+  <br>
+Technion Collaborative AI (Spring 2024)
+  <br>
+  <br>
+  <img src="https://github.com/CLAIR-LAB-TECHNION/CLAI/blob/main/tutorials/assets/CLAI_logo.png?raw=true">
+</h1>
+
+This repository contains all the auxiliary files needed for the final project in the Collaborative AI course (Course Number: 236203) offered at the Technion during the Spring 2024 semester. The resources provided here include datasets, utility functions, and other essential files required to complete your project.
+
+## Repository Structure
+
+The repository is organized as follows:
+
+- **data/**: Contains all datasets necessary for your project work.
+
+- **lib/**: Includes Python functions and modules designed to support your project development.
+
+## How to Use This Repository
+
+You don't need to manually interact with the files in this repository. A Jupyter notebook provided separately will automatically clone this repository and utilize the datasets and functions provided here.
+
+### Steps to Get Started
+
+1. **Run the Provided Notebook**: Simply execute the Jupyter notebook that has been provided to you as part of your assignment. The notebook includes all necessary commands to clone this repository and set up your environment.
+
+2. **Automatic Integration**: The notebook will automatically access the data in the `data` folder and the functions in the `lib` folder, so you can focus on completing your project without worrying about setup.
+
+## Integration with the Main CLAI Repository
+
+This repository complements the main [CLAI repository](https://github.com/CLAIR-LAB-TECHNION/CLAI), which contains additional resources, tutorials, and installation instructions. You can explore the main repository [here](https://github.com/CLAIR-LAB-TECHNION/CLAI).
+
+## Important Notes
+
+- **No Manual Setup Required**: All necessary steps are handled by the provided notebook, so there’s no need to manually clone the repository or install additional dependencies.
+- **Version Control**: If you make any changes to files within this repository after cloning, remember to keep track of those changes for your project documentation.
+
+For further resources, visit the main CLAI repository.
diff --git a/project/data/ch08-small-quotes.tgz b/project/data/ch08-small-quotes.tgz
diff --git a/project/lib/common.py b/project/lib/common.py
@@ -0,0 +1,103 @@
+import sys
+import time
+import numpy as np
+
+import torch
+import torch.nn as nn
+
+
+class RewardTracker:
+    def __init__(self, writer, stop_reward, group_rewards=1):
+        self.writer = writer
+        self.stop_reward = stop_reward
+        self.reward_buf = []
+        self.steps_buf = []
+        self.group_rewards = group_rewards
+
+    def __enter__(self):
+        self.ts = time.time()
+        self.ts_frame = 0
+        self.total_rewards = []
+        self.total_steps = []
+        return self
+
+    def __exit__(self, *args):
+        self.writer.close()
+
+    def reward(self, reward_steps, frame, epsilon=None):
+        reward, steps = reward_steps
+        self.reward_buf.append(reward)
+        self.steps_buf.append(steps)
+        if len(self.reward_buf) < self.group_rewards:
+            return False
+        reward = np.mean(self.reward_buf)
+        steps = np.mean(self.steps_buf)
+        self.reward_buf.clear()
+        self.steps_buf.clear()
+        self.total_rewards.append(reward)
+        self.total_steps.append(steps)
+        speed = (frame - self.ts_frame) / (time.time() - self.ts)
+        self.ts_frame = frame
+        self.ts = time.time()
+        mean_reward = np.mean(self.total_rewards[-100:])
+        mean_steps = np.mean(self.total_steps[-100:])
+        epsilon_str = "" if epsilon is None else ", eps %.2f" % epsilon
+        print("%d: done %d games, mean reward %.3f, mean steps %.2f, speed %.2f f/s%s" % (
+            frame, len(self.total_rewards)*self.group_rewards, mean_reward, mean_steps, speed, epsilon_str
+        ))
+        sys.stdout.flush()
+        if epsilon is not None:
+            self.writer.add_scalar("epsilon", epsilon, frame)
+        self.writer.add_scalar("speed", speed, frame)
+        self.writer.add_scalar("reward_100", mean_reward, frame)
+        self.writer.add_scalar("reward", reward, frame)
+        self.writer.add_scalar("steps_100", mean_steps, frame)
+        self.writer.add_scalar("steps", steps, frame)
+        if mean_reward > self.stop_reward:
+            print("Solved in %d frames!" % frame)
+            return True
+        return False
+
+
+def calc_values_of_states(states, net, device="cpu"):
+    mean_vals = []
+    for batch in np.array_split(states, 64):
+        states_v = torch.tensor(batch).to(device)
+        action_values_v = net(states_v)
+        best_action_values_v = action_values_v.max(1)[0]
+        mean_vals.append(best_action_values_v.mean().item())
+    return np.mean(mean_vals)
+
+
+def unpack_batch(batch):
+    states, actions, rewards, dones, last_states = [], [], [], [], []
+    for exp in batch:
+        state = np.array(exp.state, copy=False)
+        states.append(state)
+        actions.append(exp.action)
+        rewards.append(exp.reward)
+        dones.append(exp.last_state is None)
+        if exp.last_state is None:
+            last_states.append(state)       # the result will be masked anyway
+        else:
+            last_states.append(np.array(exp.last_state, copy=False))
+    return np.array(states, copy=False), np.array(actions), np.array(rewards, dtype=np.float32), \
+           np.array(dones, dtype=np.uint8), np.array(last_states, copy=False)
+
+
+def calc_loss(batch, net, tgt_net, gamma, device="cpu"):
+    states, actions, rewards, dones, next_states = unpack_batch(batch)
+
+    states_v = torch.tensor(states).to(device)
+    next_states_v = torch.tensor(next_states).to(device)
+    actions_v = torch.tensor(actions).to(device)
+    rewards_v = torch.tensor(rewards).to(device)
+    done_mask = torch.tensor(dones, dtype=torch.bool).to(device)  # Explicitly set dtype to bool
+
+    state_action_values = net(states_v).gather(1, actions_v.unsqueeze(-1)).squeeze(-1)
+    next_state_actions = net(next_states_v).max(1)[1]
+    next_state_values = tgt_net(next_states_v).gather(1, next_state_actions.unsqueeze(-1)).squeeze(-1)
+    next_state_values[done_mask] = 0.0
+
+    expected_state_action_values = next_state_values.detach() * gamma + rewards_v
+    return nn.MSELoss()(state_action_values, expected_state_action_values)
diff --git a/project/lib/data.py b/project/lib/data.py
@@ -0,0 +1,85 @@
+import os
+import csv
+import glob
+import numpy as np
+import collections
+
+
+Prices = collections.namedtuple('Prices', field_names=['open', 'high', 'low', 'close', 'volume'])
+
+
+def read_csv(file_name, sep=',', filter_data=True, fix_open_price=False):
+    print("Reading", file_name)
+    with open(file_name, 'rt', encoding='utf-8') as fd:
+        reader = csv.reader(fd, delimiter=sep)
+        h = next(reader)
+        if '<OPEN>' not in h and sep == ',':
+            return read_csv(file_name, ';')
+        indices = [h.index(s) for s in ('<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<VOL>')]
+        o, h, l, c, v = [], [], [], [], []
+        count_out = 0
+        count_filter = 0
+        count_fixed = 0
+        prev_vals = None
+        for row in reader:
+            vals = list(map(float, [row[idx] for idx in indices]))
+            if filter_data and all(map(lambda v: abs(v-vals[0]) < 1e-8, vals[:-1])):
+                count_filter += 1
+                continue
+
+            po, ph, pl, pc, pv = vals
+
+            # fix open price for current bar to match close price for the previous bar
+            if fix_open_price and prev_vals is not None:
+                ppo, pph, ppl, ppc, ppv = prev_vals
+                if abs(po - ppc) > 1e-8:
+                    count_fixed += 1
+                    po = ppc
+                    pl = min(pl, po)
+                    ph = max(ph, po)
+            count_out += 1
+            o.append(po)
+            c.append(pc)
+            h.append(ph)
+            l.append(pl)
+            v.append(pv)
+            prev_vals = vals
+    print("Read done, got %d rows, %d filtered, %d open prices adjusted" % (
+        count_filter + count_out, count_filter, count_fixed))
+    return Prices(open=np.array(o, dtype=np.float32),
+                  high=np.array(h, dtype=np.float32),
+                  low=np.array(l, dtype=np.float32),
+                  close=np.array(c, dtype=np.float32),
+                  volume=np.array(v, dtype=np.float32))
+
+
+def prices_to_relative(prices):
+    """
+    Convert prices to relative in respect to open price
+    :param ochl: tuple with open, close, high, low
+    :return: tuple with open, rel_close, rel_high, rel_low
+    """
+    assert isinstance(prices, Prices)
+    rh = (prices.high - prices.open) / prices.open
+    rl = (prices.low - prices.open) / prices.open
+    rc = (prices.close - prices.open) / prices.open
+    return Prices(open=prices.open, high=rh, low=rl, close=rc, volume=prices.volume)
+
+
+def load_relative(csv_file):
+    return prices_to_relative(read_csv(csv_file))
+
+
+def price_files(dir_name):
+    result = []
+    for path in glob.glob(os.path.join(dir_name, "*.csv")):
+        result.append(path)
+    return result
+
+
+def load_year_data(year, basedir='data'):
+    y = str(year)[-2:]
+    result = {}
+    for path in glob.glob(os.path.join(basedir, "*_%s*.csv" % y)):
+        result[path] = load_relative(path)
+    return result
diff --git a/project/lib/validation.py b/project/lib/validation.py
@@ -0,0 +1,59 @@
+import numpy as np
+
+import torch
+
+def validation_run(env, net, Actions, episodes=100, device="cpu", epsilon=0.02, commission=0.1):
+    stats = {
+        'episode_reward': [],
+        'episode_steps': [],
+        'order_profits': [],
+        'order_steps': [],
+    }
+
+    for episode in range(episodes):
+        obs = env.reset()
+
+        total_reward = 0.0
+        position = None
+        position_steps = None
+        episode_steps = 0
+
+        while True:
+            obs_v = torch.tensor(np.array([obs])).to(device)
+            out_v = net(obs_v)
+
+            action_idx = out_v.max(dim=1)[1].item()
+            if np.random.random() < epsilon:
+                action_idx = env.action_space.sample()
+            action = Actions(action_idx)
+
+            close_price = env._state._cur_close()
+
+            if action == Actions.Buy and position is None:
+                position = close_price
+                position_steps = 0
+            elif action == Actions.Close and position is not None:
+                profit = close_price - position - (close_price + position) * commission / 100
+                profit = 100.0 * profit / position
+                stats['order_profits'].append(profit)
+                stats['order_steps'].append(position_steps)
+                position = None
+                position_steps = None
+
+            obs, reward, done, _ = env.step(action_idx)
+            total_reward += reward
+            episode_steps += 1
+            if position_steps is not None:
+                position_steps += 1
+            if done:
+                if position is not None:
+                    profit = close_price - position - (close_price + position) * commission / 100
+                    profit = 100.0 * profit / position
+                    stats['order_profits'].append(profit)
+                    stats['order_steps'].append(position_steps)
+                break
+
+        stats['episode_reward'].append(total_reward)
+        stats['episode_steps'].append(episode_steps)
+
+    return {key: np.mean(vals) for key, vals in stats.items()}