Better than simple Gaussian Approximation? #2

julienrueter · 2018-01-18T16:19:41Z

Hi Siraj,
I've experimented with RNN/NN/CNN/.. and crypto price predictions in the past and most of the time these models just learned the global statistical distribution of the dataset, because in the short term the random walk model only applies and nothing more.
To illustrate this, I build a simple python script which just samples the Data according to a Gaussian distribution and pulls some samples from it to evaluate this simple price prediction Model.
The result is that this simple "Model" has a similar (in the interval of mean+2*std) Precision,Recall and F1 Score as the RNN with just the Price as Input and nothing else.
Best regards,
Julien

Result Simple Gaussian Model:
Mean Precision: 0.5823 | Mean Recall: 0.5561 | Mean F1 Score: 0.5686
STD Precision: 0.0227 | STD Recall: 0.0348 | STD F1 Score: 0.0266

julienrueter · 2018-01-18T17:02:48Z

import numpy as np
import pylab
# Daily Bitcoin price last 365 days from today
btc_price = np.array([901.794, 888.635, 890.932, 922.625, 930.491, 923.679, 909.375, 898.577, 906.981, 920.549, 922.622, 922.771, 921.125, 941.198, 974.957, 993.804, 1013.64, 1025.74, 1034.65, 1039.16, 1058.07, 1050.5, 1074.72, 970.852, 1002.01, 1004.24, 992.074, 1003.21, 1003.65, 1017.93, 1034.6, 1056.87, 1044.41, 1055.49, 1103.93, 1107.3, 1138.92, 1159.65, 1170.05, 1160.19, 1166.51, 1175.4, 1194.09, 1224.9, 1279.5, 1266.24, 1252.98, 1273.7, 1219.51, 1203.96, 1172.79, 1192.81, 1184.95, 1179.9, 1235.38, 1232.11, 1251.13, 1227.21, 1159.92, 1088.41, 1007.05, 1056.94, 1098.27, 1065.67, 1055.08, 982.209, 925.862, 982.121, 1011.44, 1064.16, 1053.31, 1025.39, 1041.74, 1083.86, 1082.16, 1128.58, 1135.66, 1124.41, 1159.78, 1173.72, 1183.98, 1178.89, 1186.64, 1196.99, 1198.42, 1185.01, 1163.22, 1185.53, 1177.18, 1173.4, 1216.91, 1208.31, 1216.64, 1224.21, 1218.76, 1220.78, 1222.4, 1261.74, 1277.44, 1293.52, 1310.59, 1315.48, 1316.88, 1372.39, 1466.32, 1465.37, 1543.12, 1606.32, 1570.62, 1569.09, 1661.17, 1827.47, 1741.45, 1838.73, 1809.64, 1762.47, 1802.03, 1756.81, 1742.49, 1826.54, 1839.17, 1957.02, 1994.14, 2058.74, 2212.54, 2258.93, 2458.2, 2729.71, 2457.24, 2007.24, 2234.27, 2197.8, 2286.44, 2201.21, 2435.48, 2412.8, 2505.07, 2526.52, 2576.8, 2898.3, 2838.14, 2749.44, 2850.36, 2796.87, 2914.06, 2794.05, 2736.73, 2608.5, 2264.09, 2480.87, 2592.96, 2561.17, 2584.1, 2643.21, 2729.53, 2712.12, 2744.13, 2694.57, 2658.48, 2520.31, 2520.75, 2532.09, 2549.52, 2508.69, 2435.93, 2494.07, 2613.7, 2557.76, 2581.69, 2559.6, 2514.31, 2579.24, 2496.37, 2404.25, 2410.17, 2348.28, 2195.55, 2042.68, 1892.59, 2178.64, 2352.79, 2280.52, 2699.62, 2645.96, 2785.17, 2758.94, 2757.5, 2571.07, 2527.5, 2671.57, 2809.01, 2729.69, 2752.85, 2872.31, 2713.63, 2711.93, 2802.24, 2899.33, 3253.94, 3218.02, 3374.01, 3419.01, 3345.37, 3391.23, 3654.33, 3883.17, 4062.87, 4306.43, 4181.96, 4374.59, 4349.1, 4128.86, 4193.7, 4087.66, 4001.74, 4100.52, 4151.52, 4334.68, 4371.6, 4352.4, 4383.28, 4387.51, 4555.86, 4557.24, 4718.37, 4910.47, 4631.97, 4539.47, 4157.95, 4453.47, 4533.17, 4575.35, 4145.38, 4203.61, 4209.58, 4207.6, 4079.7, 3912.01, 3343.47, 3716.3, 3564.06, 3645.36, 4000.31, 3912.91, 3858.69, 3670.53, 3608.36, 3782.44, 3700.48, 3935.72, 3909.23, 4162.55, 4170.8, 4212.62, 4335.09, 4417.06, 4420.15, 4312.44, 4247.14, 4320.53, 4362.94, 4431.99, 4614.8, 4826.02, 4778.94, 4846.95, 5509.22, 5618.17, 5835.97, 5669.7, 5606.32, 5521.39, 5634.66, 5697.31, 6074.33, 6058.24, 6040.02, 5696.78, 5430.46, 5745.22, 5956.62, 5840.19, 5739.39, 6129.01, 6144.72, 6423.43, 6883.19, 7042.21, 7073.37, 7404.87, 7384.71, 7119.88, 7331.84, 7391.52, 7276.36, 6837.83, 6172.27, 5959.35, 6689.06, 6796.43, 7203.09, 7894.08, 7542.92, 7841.41, 8012.17, 8221.34, 8126.75, 8231.85, 7953.04, 8208.21, 8841.21, 9671.47, 9750.23, 10271.1, 10618.3, 9895.34, 11157.8, 11054.5, 11420.8, 11716.3, 12269.2, 14286.0, 17839.9, 16065.0, 14228.7, 16556.2, 17059.3, 16855.2, 16255.9, 17766.3, 17672.2, 19345.6, 18621.3, 19053.0, 17040.4, 16674.1, 14895.1, 14427.6, 13732.5, 13565.8, 14140.3, 16282.9, 14750.1, 15167.8, 13538.0, 13529.8, 13760.1, 13795.7, 15288.6, 15396.8, 15809.6, 17355.6, 17151.9, 16141.8, 15362.3, 14434.0, 14521.7, 13608.5, 14285.9, 14360.4, 13747.8, 13390.0, 10954.9, 11887.5, 11644.1])

# Change in price of Bitcoin at time t: price[t]/price[t-1]
change_in_price = btc_price[1:]/btc_price[:-1]

# Assumption: Log of price change can be described through Normal distribution
log_price_change = np.log(change_in_price)


def gauss(x,m,sig):
    #gaussian distribution; m = Mean, sig = Variance
    y = np.exp(-(x-m)**2/(2*sig))/(2*np.pi*sig)**0.5
    return y


def simple_model(m,sig,log_price_change):
    # get a random Sample of Gaussian: needs mean, standard deviation and sample length
    random_sample = np.random.normal(m,sig**0.5,len(log_price_change))
    return random_sample

def calculate_scores(log_price_change,random_sample):
    # Matrix for score calculation
    mat = np.zeros((2,2))
    
    for i in range(len(random_sample)):
        if random_sample[i]>0:
            if log_price_change[i] > 0:
                mat[0,0] += 1
            else:
                mat[0,1] += 1
        else:
            if log_price_change[i] > 0:
                mat[1,0] += 1
            else:
                mat[1,1] += 1
    precision = mat[0,0]/(mat[0,0]+mat[0,1])
    recall = mat[0,0]/(mat[0,0]+mat[1,0])
    f1_score = 2*precision*recall/(precision+recall)
    return precision,recall,f1_score


# Data range
x = np.linspace(min(log_price_change)-0.1,max(log_price_change)+0.1,100)
# Mean of log_price_change
mean_log_price_change = np.mean(log_price_change)
# Variance of log_price_change
var_log_price_change = np.var(log_price_change)

# Gaussian distribution
distribution = gauss(x,mean_log_price_change,var_log_price_change)

# Plot Gaussian vs Histogram plot of Real Data
pylab.hist(log_price_change,20,normed=True,label="Hist of Log Price Change")
pylab.plot(x,distribution,label="Gaussian")
pylab.legend()
pylab.show()


# plot simple_model vs Real Data | these kind of plots are a bit misleading because the Error of the prediction one timestep before is erased after each prediction
random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
pylab.subplot(211)
pylab.plot(btc_price[1:],label="Bitcoin Price")
# reverse log of model data and multiply by price at time t-1 
pylab.plot(btc_price[:-1]*np.exp(random_sample),label="Model Price")
pylab.legend()
# log of change in price vs model | A more accurate graphical representation of the accuracy of the prediction 
pylab.subplot(212)
pylab.plot(log_price_change,label="Log Price Change")
pylab.plot(random_sample,label="Model Sample Data")
pylab.legend()
pylab.show()

# Calculate Scores
template = "Precision: %.4f | Recall: %.4f | F1 Score: %.4f"
scores = calculate_scores(log_price_change,random_sample)
print(template%scores)
print("-"*20)
# Now Test multiple Samples to get a range of Scores
for i in range(10):
    random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
    scores = calculate_scores(log_price_change,random_sample)
    print("Sample",i,template%scores)
# Build mean of scores
template_mean = "Mean Precision: %.4f | Mean Recall: %.4f | Mean F1 Score: %.4f"
template_std = "STD Precision: %.4f | STD Recall: %.4f | STD F1 Score: %.4f"
sample_scores = []
for i in range(1000):
    random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
    scores = calculate_scores(log_price_change,random_sample)
    sample_scores.append([j for j in scores])

sample_scores = np.array(sample_scores)
print(template_mean%tuple(np.mean(sample_scores,axis=0).tolist()))
print(template_std%tuple(np.std(sample_scores,axis=0).tolist()))

# Sample Prediction with accumulating Error of Model vs Reality
prediction = [btc_price[0]]
for i in np.exp(random_sample):
    prediction.append(i*prediction[-1])
pylab.plot(btc_price,label="Bitcoin Price")
pylab.plot(prediction,label="Model Price with accumulating Error")
pylab.legend()
pylab.show()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Better than simple Gaussian Approximation? #2

Better than simple Gaussian Approximation? #2

julienrueter commented Jan 18, 2018 •

edited

Loading

julienrueter commented Jan 18, 2018

Better than simple Gaussian Approximation? #2

Better than simple Gaussian Approximation? #2

Comments

julienrueter commented Jan 18, 2018 • edited Loading

julienrueter commented Jan 18, 2018

julienrueter commented Jan 18, 2018 •

edited

Loading