1
+ from __future__ import print_function
1
2
import numpy as np
2
- import backtrader as bt
3
+ import multiprocessing as mp
3
4
4
- from evostra import EvolutionStrategy
5
- from keras .models import Model , Input , Sequential
6
- from keras .layers import Dense , Activation
7
5
6
+ class EvolutionStrategy (object ):
8
7
9
- class ESStrategy (bt .Strategy ):
10
- params = {
11
- 'model' : None
12
- }
8
+ def __init__ (self , weights , get_reward_func , population_size = 50 , sigma = 0.1 , learning_rate = 0.001 ):
9
+ np .random .seed (0 )
10
+ self .weights = weights
11
+ self .get_reward = get_reward_func
12
+ self .POPULATION_SIZE = population_size
13
+ self .SIGMA = sigma
14
+ self .LEARNING_RATE = learning_rate
13
15
14
- def __init__ (self ):
15
- self .order = None
16
- self .dataclose = self .datas [0 ].close
17
- self .datavol = self .datas [0 ].volume
18
16
19
- def stop (self ):
20
- cash = self .broker .getvalue ()
21
- print ('Result cash: {}' .format (cash ))
17
+ def _get_weights_try (self , w , p ):
18
+ weights_try = []
19
+ for index , i in enumerate (p ):
20
+ jittered = self .SIGMA * i
21
+ weights_try .append (w [index ] + jittered )
22
+ return weights_try
22
23
23
- def notify_order (self , order ):
24
- if order .status in [order .Submitted , order .Accepted ]:
25
- return
26
24
27
- self .order = None
25
+ def get_weights (self ):
26
+ return self .weights
28
27
29
- def next (self ):
30
- if self .order :
31
- return
32
28
33
- input_data = []
34
- for i in range (7 ):
35
- input_data .append (self .dataclose [i - 6 ])
36
- input_data .append (self .datavol [i - 6 ])
37
- inp = np .asanyarray (input_data )
38
- inp = np .expand_dims (inp , 0 )
29
+ def run (self , iterations , print_step = 10 ):
30
+ for iteration in range (iterations ):
39
31
40
- predict = self . p . model . predict ( inp )[ 0 ]
41
- predict = np . argmax ( predict )
32
+ if iteration % print_step == 0 :
33
+ print ( 'iter %d. reward: %f' % ( iteration , self . get_reward ( self . weights )) )
42
34
43
- if not self .position :
44
- if predict == 0 :
45
- self .order = self .buy ()
46
- else :
47
- if predict == 1 :
48
- self .order = self .sell ()
35
+ population = []
36
+ rewards = np .zeros (self .POPULATION_SIZE )
37
+ for i in range (self .POPULATION_SIZE ):
38
+ x = []
39
+ for w in self .weights :
40
+ x .append (np .random .randn (* w .shape ))
41
+ population .append (x )
49
42
50
- if not self .position :
51
- if predict == 1 :
52
- self .order = self .sell ()
53
- else :
54
- if predict == 0 :
55
- self .order = self .buy ()
43
+ for i in range (self .POPULATION_SIZE ):
44
+ weights_try = self ._get_weights_try (self .weights , population [i ])
45
+ rewards [i ] = self .get_reward (weights_try )
56
46
57
- model = Sequential ()
58
- model .add (Dense (128 , input_dim = 14 , activation = 'relu' ))
59
- model .add (Dense (256 , activation = 'relu' ))
60
- model .add (Dense (512 , activation = 'relu' ))
61
- model .add (Dense (1024 , activation = 'relu' ))
62
- model .add (Dense (2 , activation = 'relu' ))
47
+ rewards = (rewards - np .mean (rewards )) / np .std (rewards )
63
48
64
- model .compile (optimizer = 'Adam' , loss = 'mse' )
65
-
66
- data = bt .feeds .GenericCSVData (
67
- dataname = 'eur_usd_1d.csv' ,
68
- separator = ',' ,
69
- dtformat = ('%Y%m%d' ),
70
- tmformat = ('%H%M00' ),
71
- datetime = 0 ,
72
- time = 1 ,
73
- open = 2 ,
74
- high = 3 ,
75
- low = 4 ,
76
- close = 5 ,
77
- volume = 6 ,
78
- openinterest = - 1
79
- )
80
-
81
- def get_reward (weights ):
82
- model .set_weights (weights )
83
- cerebro = bt .Cerebro ()
84
- cerebro .addstrategy (ESStrategy , model = model )
85
- cerebro .adddata (data )
86
- cerebro .broker .setcash (1000 )
87
- cerebro .addsizer (bt .sizers .FixedSize , stake = 50 )
88
-
89
- cerebro .run ()
90
- return cerebro .broker .getvalue ()
91
-
92
- es = EvolutionStrategy (model .get_weights (), get_reward , population_size = 50 , sigma = 0.1 , learning_rate = 0.001 )
93
- es .run (1000 , print_step = 100 )
49
+ for index , w in enumerate (self .weights ):
50
+ A = np .array ([p [index ] for p in population ])
51
+ self .weights [index ] = w + self .LEARNING_RATE / (self .POPULATION_SIZE * self .SIGMA ) * np .dot (A .T , rewards ).T
0 commit comments