Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better than simple Gaussian Approximation? #2

Open
julienrueter opened this issue Jan 18, 2018 · 1 comment
Open

Better than simple Gaussian Approximation? #2

julienrueter opened this issue Jan 18, 2018 · 1 comment

Comments

@julienrueter
Copy link

julienrueter commented Jan 18, 2018

Hi Siraj,
I've experimented with RNN/NN/CNN/.. and crypto price predictions in the past and most of the time these models just learned the global statistical distribution of the dataset, because in the short term the random walk model only applies and nothing more.
To illustrate this, I build a simple python script which just samples the Data according to a Gaussian distribution and pulls some samples from it to evaluate this simple price prediction Model.
The result is that this simple "Model" has a similar (in the interval of mean+2*std) Precision,Recall and F1 Score as the RNN with just the Price as Input and nothing else.
Best regards,
Julien

Result Simple Gaussian Model:
Mean Precision: 0.5823 | Mean Recall: 0.5561 | Mean F1 Score: 0.5686
STD Precision: 0.0227 | STD Recall: 0.0348 | STD F1 Score: 0.0266

@julienrueter
Copy link
Author

import numpy as np
import pylab
# Daily Bitcoin price last 365 days from today
btc_price = np.array([901.794, 888.635, 890.932, 922.625, 930.491, 923.679, 909.375, 898.577, 906.981, 920.549, 922.622, 922.771, 921.125, 941.198, 974.957, 993.804, 1013.64, 1025.74, 1034.65, 1039.16, 1058.07, 1050.5, 1074.72, 970.852, 1002.01, 1004.24, 992.074, 1003.21, 1003.65, 1017.93, 1034.6, 1056.87, 1044.41, 1055.49, 1103.93, 1107.3, 1138.92, 1159.65, 1170.05, 1160.19, 1166.51, 1175.4, 1194.09, 1224.9, 1279.5, 1266.24, 1252.98, 1273.7, 1219.51, 1203.96, 1172.79, 1192.81, 1184.95, 1179.9, 1235.38, 1232.11, 1251.13, 1227.21, 1159.92, 1088.41, 1007.05, 1056.94, 1098.27, 1065.67, 1055.08, 982.209, 925.862, 982.121, 1011.44, 1064.16, 1053.31, 1025.39, 1041.74, 1083.86, 1082.16, 1128.58, 1135.66, 1124.41, 1159.78, 1173.72, 1183.98, 1178.89, 1186.64, 1196.99, 1198.42, 1185.01, 1163.22, 1185.53, 1177.18, 1173.4, 1216.91, 1208.31, 1216.64, 1224.21, 1218.76, 1220.78, 1222.4, 1261.74, 1277.44, 1293.52, 1310.59, 1315.48, 1316.88, 1372.39, 1466.32, 1465.37, 1543.12, 1606.32, 1570.62, 1569.09, 1661.17, 1827.47, 1741.45, 1838.73, 1809.64, 1762.47, 1802.03, 1756.81, 1742.49, 1826.54, 1839.17, 1957.02, 1994.14, 2058.74, 2212.54, 2258.93, 2458.2, 2729.71, 2457.24, 2007.24, 2234.27, 2197.8, 2286.44, 2201.21, 2435.48, 2412.8, 2505.07, 2526.52, 2576.8, 2898.3, 2838.14, 2749.44, 2850.36, 2796.87, 2914.06, 2794.05, 2736.73, 2608.5, 2264.09, 2480.87, 2592.96, 2561.17, 2584.1, 2643.21, 2729.53, 2712.12, 2744.13, 2694.57, 2658.48, 2520.31, 2520.75, 2532.09, 2549.52, 2508.69, 2435.93, 2494.07, 2613.7, 2557.76, 2581.69, 2559.6, 2514.31, 2579.24, 2496.37, 2404.25, 2410.17, 2348.28, 2195.55, 2042.68, 1892.59, 2178.64, 2352.79, 2280.52, 2699.62, 2645.96, 2785.17, 2758.94, 2757.5, 2571.07, 2527.5, 2671.57, 2809.01, 2729.69, 2752.85, 2872.31, 2713.63, 2711.93, 2802.24, 2899.33, 3253.94, 3218.02, 3374.01, 3419.01, 3345.37, 3391.23, 3654.33, 3883.17, 4062.87, 4306.43, 4181.96, 4374.59, 4349.1, 4128.86, 4193.7, 4087.66, 4001.74, 4100.52, 4151.52, 4334.68, 4371.6, 4352.4, 4383.28, 4387.51, 4555.86, 4557.24, 4718.37, 4910.47, 4631.97, 4539.47, 4157.95, 4453.47, 4533.17, 4575.35, 4145.38, 4203.61, 4209.58, 4207.6, 4079.7, 3912.01, 3343.47, 3716.3, 3564.06, 3645.36, 4000.31, 3912.91, 3858.69, 3670.53, 3608.36, 3782.44, 3700.48, 3935.72, 3909.23, 4162.55, 4170.8, 4212.62, 4335.09, 4417.06, 4420.15, 4312.44, 4247.14, 4320.53, 4362.94, 4431.99, 4614.8, 4826.02, 4778.94, 4846.95, 5509.22, 5618.17, 5835.97, 5669.7, 5606.32, 5521.39, 5634.66, 5697.31, 6074.33, 6058.24, 6040.02, 5696.78, 5430.46, 5745.22, 5956.62, 5840.19, 5739.39, 6129.01, 6144.72, 6423.43, 6883.19, 7042.21, 7073.37, 7404.87, 7384.71, 7119.88, 7331.84, 7391.52, 7276.36, 6837.83, 6172.27, 5959.35, 6689.06, 6796.43, 7203.09, 7894.08, 7542.92, 7841.41, 8012.17, 8221.34, 8126.75, 8231.85, 7953.04, 8208.21, 8841.21, 9671.47, 9750.23, 10271.1, 10618.3, 9895.34, 11157.8, 11054.5, 11420.8, 11716.3, 12269.2, 14286.0, 17839.9, 16065.0, 14228.7, 16556.2, 17059.3, 16855.2, 16255.9, 17766.3, 17672.2, 19345.6, 18621.3, 19053.0, 17040.4, 16674.1, 14895.1, 14427.6, 13732.5, 13565.8, 14140.3, 16282.9, 14750.1, 15167.8, 13538.0, 13529.8, 13760.1, 13795.7, 15288.6, 15396.8, 15809.6, 17355.6, 17151.9, 16141.8, 15362.3, 14434.0, 14521.7, 13608.5, 14285.9, 14360.4, 13747.8, 13390.0, 10954.9, 11887.5, 11644.1])

# Change in price of Bitcoin at time t: price[t]/price[t-1]
change_in_price = btc_price[1:]/btc_price[:-1]

# Assumption: Log of price change can be described through Normal distribution
log_price_change = np.log(change_in_price)


def gauss(x,m,sig):
    #gaussian distribution; m = Mean, sig = Variance
    y = np.exp(-(x-m)**2/(2*sig))/(2*np.pi*sig)**0.5
    return y


def simple_model(m,sig,log_price_change):
    # get a random Sample of Gaussian: needs mean, standard deviation and sample length
    random_sample = np.random.normal(m,sig**0.5,len(log_price_change))
    return random_sample

def calculate_scores(log_price_change,random_sample):
    # Matrix for score calculation
    mat = np.zeros((2,2))
    
    for i in range(len(random_sample)):
        if random_sample[i]>0:
            if log_price_change[i] > 0:
                mat[0,0] += 1
            else:
                mat[0,1] += 1
        else:
            if log_price_change[i] > 0:
                mat[1,0] += 1
            else:
                mat[1,1] += 1
    precision = mat[0,0]/(mat[0,0]+mat[0,1])
    recall = mat[0,0]/(mat[0,0]+mat[1,0])
    f1_score = 2*precision*recall/(precision+recall)
    return precision,recall,f1_score


# Data range
x = np.linspace(min(log_price_change)-0.1,max(log_price_change)+0.1,100)
# Mean of log_price_change
mean_log_price_change = np.mean(log_price_change)
# Variance of log_price_change
var_log_price_change = np.var(log_price_change)

# Gaussian distribution
distribution = gauss(x,mean_log_price_change,var_log_price_change)

# Plot Gaussian vs Histogram plot of Real Data
pylab.hist(log_price_change,20,normed=True,label="Hist of Log Price Change")
pylab.plot(x,distribution,label="Gaussian")
pylab.legend()
pylab.show()


# plot simple_model vs Real Data | these kind of plots are a bit misleading because the Error of the prediction one timestep before is erased after each prediction
random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
pylab.subplot(211)
pylab.plot(btc_price[1:],label="Bitcoin Price")
# reverse log of model data and multiply by price at time t-1 
pylab.plot(btc_price[:-1]*np.exp(random_sample),label="Model Price")
pylab.legend()
# log of change in price vs model | A more accurate graphical representation of the accuracy of the prediction 
pylab.subplot(212)
pylab.plot(log_price_change,label="Log Price Change")
pylab.plot(random_sample,label="Model Sample Data")
pylab.legend()
pylab.show()

# Calculate Scores
template = "Precision: %.4f | Recall: %.4f | F1 Score: %.4f"
scores = calculate_scores(log_price_change,random_sample)
print(template%scores)
print("-"*20)
# Now Test multiple Samples to get a range of Scores
for i in range(10):
    random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
    scores = calculate_scores(log_price_change,random_sample)
    print("Sample",i,template%scores)
# Build mean of scores
template_mean = "Mean Precision: %.4f | Mean Recall: %.4f | Mean F1 Score: %.4f"
template_std = "STD Precision: %.4f | STD Recall: %.4f | STD F1 Score: %.4f"
sample_scores = []
for i in range(1000):
    random_sample = simple_model(mean_log_price_change,var_log_price_change,log_price_change)
    scores = calculate_scores(log_price_change,random_sample)
    sample_scores.append([j for j in scores])

sample_scores = np.array(sample_scores)
print(template_mean%tuple(np.mean(sample_scores,axis=0).tolist()))
print(template_std%tuple(np.std(sample_scores,axis=0).tolist()))

# Sample Prediction with accumulating Error of Model vs Reality
prediction = [btc_price[0]]
for i in np.exp(random_sample):
    prediction.append(i*prediction[-1])
pylab.plot(btc_price,label="Bitcoin Price")
pylab.plot(prediction,label="Model Price with accumulating Error")
pylab.legend()
pylab.show()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant