Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
aelashkin authored Sep 1, 2024
1 parent ceab0db commit 49ab880
Show file tree
Hide file tree
Showing 5 changed files with 287 additions and 0 deletions.
40 changes: 40 additions & 0 deletions project/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# CLAI-HW1 Repository

<h1 align="center">
<br>
Technion Collaborative AI (Spring 2024)
<br>
<br>
<img src="https://github.com/CLAIR-LAB-TECHNION/CLAI/blob/main/tutorials/assets/CLAI_logo.png?raw=true">
</h1>

This repository contains all the auxiliary files needed for the final project in the Collaborative AI course (Course Number: 236203) offered at the Technion during the Spring 2024 semester. The resources provided here include datasets, utility functions, and other essential files required to complete your project.

## Repository Structure

The repository is organized as follows:

- **data/**: Contains all datasets necessary for your project work.

- **lib/**: Includes Python functions and modules designed to support your project development.

## How to Use This Repository

You don't need to manually interact with the files in this repository. A Jupyter notebook provided separately will automatically clone this repository and utilize the datasets and functions provided here.

### Steps to Get Started

1. **Run the Provided Notebook**: Simply execute the Jupyter notebook that has been provided to you as part of your assignment. The notebook includes all necessary commands to clone this repository and set up your environment.

2. **Automatic Integration**: The notebook will automatically access the data in the `data` folder and the functions in the `lib` folder, so you can focus on completing your project without worrying about setup.

## Integration with the Main CLAI Repository

This repository complements the main [CLAI repository](https://github.com/CLAIR-LAB-TECHNION/CLAI), which contains additional resources, tutorials, and installation instructions. You can explore the main repository [here](https://github.com/CLAIR-LAB-TECHNION/CLAI).

## Important Notes

- **No Manual Setup Required**: All necessary steps are handled by the provided notebook, so there’s no need to manually clone the repository or install additional dependencies.
- **Version Control**: If you make any changes to files within this repository after cloning, remember to keep track of those changes for your project documentation.

For further resources, visit the main CLAI repository.
Binary file added project/data/ch08-small-quotes.tgz
Binary file not shown.
103 changes: 103 additions & 0 deletions project/lib/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import sys
import time
import numpy as np

import torch
import torch.nn as nn


class RewardTracker:
def __init__(self, writer, stop_reward, group_rewards=1):
self.writer = writer
self.stop_reward = stop_reward
self.reward_buf = []
self.steps_buf = []
self.group_rewards = group_rewards

def __enter__(self):
self.ts = time.time()
self.ts_frame = 0
self.total_rewards = []
self.total_steps = []
return self

def __exit__(self, *args):
self.writer.close()

def reward(self, reward_steps, frame, epsilon=None):
reward, steps = reward_steps
self.reward_buf.append(reward)
self.steps_buf.append(steps)
if len(self.reward_buf) < self.group_rewards:
return False
reward = np.mean(self.reward_buf)
steps = np.mean(self.steps_buf)
self.reward_buf.clear()
self.steps_buf.clear()
self.total_rewards.append(reward)
self.total_steps.append(steps)
speed = (frame - self.ts_frame) / (time.time() - self.ts)
self.ts_frame = frame
self.ts = time.time()
mean_reward = np.mean(self.total_rewards[-100:])
mean_steps = np.mean(self.total_steps[-100:])
epsilon_str = "" if epsilon is None else ", eps %.2f" % epsilon
print("%d: done %d games, mean reward %.3f, mean steps %.2f, speed %.2f f/s%s" % (
frame, len(self.total_rewards)*self.group_rewards, mean_reward, mean_steps, speed, epsilon_str
))
sys.stdout.flush()
if epsilon is not None:
self.writer.add_scalar("epsilon", epsilon, frame)
self.writer.add_scalar("speed", speed, frame)
self.writer.add_scalar("reward_100", mean_reward, frame)
self.writer.add_scalar("reward", reward, frame)
self.writer.add_scalar("steps_100", mean_steps, frame)
self.writer.add_scalar("steps", steps, frame)
if mean_reward > self.stop_reward:
print("Solved in %d frames!" % frame)
return True
return False


def calc_values_of_states(states, net, device="cpu"):
mean_vals = []
for batch in np.array_split(states, 64):
states_v = torch.tensor(batch).to(device)
action_values_v = net(states_v)
best_action_values_v = action_values_v.max(1)[0]
mean_vals.append(best_action_values_v.mean().item())
return np.mean(mean_vals)


def unpack_batch(batch):
states, actions, rewards, dones, last_states = [], [], [], [], []
for exp in batch:
state = np.array(exp.state, copy=False)
states.append(state)
actions.append(exp.action)
rewards.append(exp.reward)
dones.append(exp.last_state is None)
if exp.last_state is None:
last_states.append(state) # the result will be masked anyway
else:
last_states.append(np.array(exp.last_state, copy=False))
return np.array(states, copy=False), np.array(actions), np.array(rewards, dtype=np.float32), \
np.array(dones, dtype=np.uint8), np.array(last_states, copy=False)


def calc_loss(batch, net, tgt_net, gamma, device="cpu"):
states, actions, rewards, dones, next_states = unpack_batch(batch)

states_v = torch.tensor(states).to(device)
next_states_v = torch.tensor(next_states).to(device)
actions_v = torch.tensor(actions).to(device)
rewards_v = torch.tensor(rewards).to(device)
done_mask = torch.tensor(dones, dtype=torch.bool).to(device) # Explicitly set dtype to bool

state_action_values = net(states_v).gather(1, actions_v.unsqueeze(-1)).squeeze(-1)
next_state_actions = net(next_states_v).max(1)[1]
next_state_values = tgt_net(next_states_v).gather(1, next_state_actions.unsqueeze(-1)).squeeze(-1)
next_state_values[done_mask] = 0.0

expected_state_action_values = next_state_values.detach() * gamma + rewards_v
return nn.MSELoss()(state_action_values, expected_state_action_values)
85 changes: 85 additions & 0 deletions project/lib/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import os
import csv
import glob
import numpy as np
import collections


Prices = collections.namedtuple('Prices', field_names=['open', 'high', 'low', 'close', 'volume'])


def read_csv(file_name, sep=',', filter_data=True, fix_open_price=False):
print("Reading", file_name)
with open(file_name, 'rt', encoding='utf-8') as fd:
reader = csv.reader(fd, delimiter=sep)
h = next(reader)
if '<OPEN>' not in h and sep == ',':
return read_csv(file_name, ';')
indices = [h.index(s) for s in ('<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>', '<VOL>')]
o, h, l, c, v = [], [], [], [], []
count_out = 0
count_filter = 0
count_fixed = 0
prev_vals = None
for row in reader:
vals = list(map(float, [row[idx] for idx in indices]))
if filter_data and all(map(lambda v: abs(v-vals[0]) < 1e-8, vals[:-1])):
count_filter += 1
continue

po, ph, pl, pc, pv = vals

# fix open price for current bar to match close price for the previous bar
if fix_open_price and prev_vals is not None:
ppo, pph, ppl, ppc, ppv = prev_vals
if abs(po - ppc) > 1e-8:
count_fixed += 1
po = ppc
pl = min(pl, po)
ph = max(ph, po)
count_out += 1
o.append(po)
c.append(pc)
h.append(ph)
l.append(pl)
v.append(pv)
prev_vals = vals
print("Read done, got %d rows, %d filtered, %d open prices adjusted" % (
count_filter + count_out, count_filter, count_fixed))
return Prices(open=np.array(o, dtype=np.float32),
high=np.array(h, dtype=np.float32),
low=np.array(l, dtype=np.float32),
close=np.array(c, dtype=np.float32),
volume=np.array(v, dtype=np.float32))


def prices_to_relative(prices):
"""
Convert prices to relative in respect to open price
:param ochl: tuple with open, close, high, low
:return: tuple with open, rel_close, rel_high, rel_low
"""
assert isinstance(prices, Prices)
rh = (prices.high - prices.open) / prices.open
rl = (prices.low - prices.open) / prices.open
rc = (prices.close - prices.open) / prices.open
return Prices(open=prices.open, high=rh, low=rl, close=rc, volume=prices.volume)


def load_relative(csv_file):
return prices_to_relative(read_csv(csv_file))


def price_files(dir_name):
result = []
for path in glob.glob(os.path.join(dir_name, "*.csv")):
result.append(path)
return result


def load_year_data(year, basedir='data'):
y = str(year)[-2:]
result = {}
for path in glob.glob(os.path.join(basedir, "*_%s*.csv" % y)):
result[path] = load_relative(path)
return result
59 changes: 59 additions & 0 deletions project/lib/validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np

import torch

def validation_run(env, net, Actions, episodes=100, device="cpu", epsilon=0.02, commission=0.1):
stats = {
'episode_reward': [],
'episode_steps': [],
'order_profits': [],
'order_steps': [],
}

for episode in range(episodes):
obs = env.reset()

total_reward = 0.0
position = None
position_steps = None
episode_steps = 0

while True:
obs_v = torch.tensor(np.array([obs])).to(device)
out_v = net(obs_v)

action_idx = out_v.max(dim=1)[1].item()
if np.random.random() < epsilon:
action_idx = env.action_space.sample()
action = Actions(action_idx)

close_price = env._state._cur_close()

if action == Actions.Buy and position is None:
position = close_price
position_steps = 0
elif action == Actions.Close and position is not None:
profit = close_price - position - (close_price + position) * commission / 100
profit = 100.0 * profit / position
stats['order_profits'].append(profit)
stats['order_steps'].append(position_steps)
position = None
position_steps = None

obs, reward, done, _ = env.step(action_idx)
total_reward += reward
episode_steps += 1
if position_steps is not None:
position_steps += 1
if done:
if position is not None:
profit = close_price - position - (close_price + position) * commission / 100
profit = 100.0 * profit / position
stats['order_profits'].append(profit)
stats['order_steps'].append(position_steps)
break

stats['episode_reward'].append(total_reward)
stats['episode_steps'].append(episode_steps)

return {key: np.mean(vals) for key, vals in stats.items()}

0 comments on commit 49ab880

Please sign in to comment.