Skip to content

Python 3.x support #75

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 32 additions & 32 deletions Examples/Basic/numpy-tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,33 +35,33 @@
#
#
## Lets get started!
print "Importing numpy"
print("Importing numpy")
import numpy as np

## This loads the numpy library and lets us refer to it by the shorthand "np",
## which is the convention used in the numpy documentation and in many
## online tutorials/examples

print "Creating arrays"
print("Creating arrays")
## Now lets make an array to play around with. You can make numpy arrays in
## a number of ways,
## Filled with zeros:
zeroArray = np.zeros( (2,3) ) # [[ 0. 0. 0.]
print zeroArray # [ 0. 0. 0.]]
print(zeroArray) # [ 0. 0. 0.]]

## Or ones:
oneArray = np.ones( (2,3) ) # [[ 1. 1. 1.]
print oneArray # [ 1. 1. 1.]]
print(oneArray) # [ 1. 1. 1.]]

## Or filled with junk:
emptyArray = np.empty( (2,3) )
print emptyArray
print(emptyArray)

## Note, emptyArray might look random, but it's just uninitialized which means
## you shouldn't count on it having any particular data in it, even random
## data! If you do want random data you can use random():
randomArray = np.random.random( (2,3) )
print randomArray
print(randomArray)

## If you're following along and trying these commands out, you should have
## noticed that making randomArray took a lot longer than emptyArray. That's
Expand All @@ -74,29 +74,29 @@
[4,5,6]]

myArray = np.array(foo) # [[1 2 3]
print myArray # [4 5 6]]
print(myArray) # [4 5 6]]


print "Reshaping arrays"
print("Reshaping arrays")
## Of course, if you're typing out a range for a larger matrix, it's easier to
## use arange(...):
rangeArray = np.arange(6,12).reshape( (2,3) ) # [[ 6 7 8]
print rangeArray # [ 9 10 11]]
print(rangeArray) # [ 9 10 11]]

## there's two things going on here. First, the arange(...) function returns a
## 1D array similar to what you'd get from using the built-in python function
## range(...) with the same arguments, except it returns a numpy array
## instead of a list.
print np.arange(6,12) # [ 6 7 8 9 10 11 12]
print(np.arange(6,12)) # [ 6 7 8 9 10 11 12]

## the reshape method takes the data in an existing array, and stuffs it into
## an array with the given shape and returns it.
print rangeArray.reshape( (3,2) ) # [[ 6 7]
print(rangeArray.reshape( (3,2) )) # [[ 6 7]
# [ 8 9]
# [10 11]]

#The original array doesn't change though.
print rangeArray # [[ 6 7 8]
print(rangeArray) # [[ 6 7 8]
# [ 9 10 11]

## When you use reshape(...) the total number of things in the array must stay
Expand All @@ -106,69 +106,69 @@
squareArray = np.arange(1,10).reshape( (3,3) ) #this is fine, 9 elements


print "Accessing array elements"
print("Accessing array elements")
## Accessing an array is also pretty straight forward. You access a specific
## spot in the table by referring to its row and column inside square braces
## after the array:
print rangeArray[0,1] #7
print(rangeArray[0,1]) #7

## Note that row and column numbers start from 0, not 1! Numpy also lets you
## refer to ranges inside an array:
print rangeArray[0,0:2] #[6 7]
print squareArray[0:2,0:2] #[[1 2] # the top left corner of squareArray
print(rangeArray[0,0:2]) #[6 7]
print(squareArray[0:2,0:2]) #[[1 2] # the top left corner of squareArray
# [4 5]]

## These ranges work just like slices and python lists. n:m:t specifies a range
## that starts at n, and stops before m, in steps of size t. If any of these
## are left off, they're assumed to be the start, the end+1, and 1 respectively
print squareArray[:,0:3:2] #[[1 3] #skip the middle column
print(squareArray[:,0:3:2]) #[[1 3] #skip the middle column
# [4 6]
# [7 9]]

## Also like python lists, you can assign values to specific positions, or
## ranges of values to slices
squareArray[0,:] = np.array(range(1,4)) #set the first row to 1,2,3
squareArray[0,:] = np.array(list(range(1,4))) #set the first row to 1,2,3
squareArray[1,1] = 0 # set the middle spot to zero
squareArray[2,:] = 1 # set the last row to ones
print squareArray # [[1 2 3]
print(squareArray) # [[1 2 3]
# [4 0 6]
# [1 1 1]]

## Something new to numpy arrays is indexing using an array of indices:
fibIndices = np.array( [1, 1, 2, 3] )
randomRow = np.random.random( (10,1) ) # an array of 10 random numbers
print randomRow
print randomRow[fibIndices] # the first, first, second and third element of
print(randomRow)
print(randomRow[fibIndices]) # the first, first, second and third element of
# randomRow

## You can also use an array of true/false values to index:
boolIndices = np.array( [[ True, False, True],
[False, True, False],
[ True, False, True]] )
print squareArray[boolIndices] # a 1D array with the selected values
print(squareArray[boolIndices]) # a 1D array with the selected values
# [1 3 0 1 1]

## It gets a little more complicated with 2D (and higher) arrays. You need
## two index arrays for a 2D array:
rows = np.array( [[0,0],[2,2]] ) #get the corners of our square array
cols = np.array( [[0,2],[0,2]] )
print squareArray[rows,cols] #[[1 3]
print(squareArray[rows,cols]) #[[1 3]
# [1 1]]
boolRows = np.array( [False, True, False] ) # just the middle row
boolCols = np.array( [True, False, True] ) # Not the middle column
print squareArray[boolRows,boolCols] # [4 6]
print(squareArray[boolRows,boolCols]) # [4 6]

print "Operations on arrays"
print("Operations on arrays")
## One useful trick is to create a boolean matrix based on some test and use
## that as an index in order to get the elements of a matrix that pass the
## test:
sqAverage = np.average(squareArray) # average(...) returns the average of all
# the elements in the given array
betterThanAverage = squareArray > sqAverage
print betterThanAverage #[[False False True]
print(betterThanAverage) #[[False False True]
# [ True False True]
# [False False False]]
print squareArray[betterThanAverage] #[3 4 6]
print(squareArray[betterThanAverage]) #[3 4 6]

## Indexing like this can also be used to assign values to elements of the
## array. This is particularly useful if you want to filter an array, say by
Expand All @@ -188,32 +188,32 @@
# truncate them down to integers.
clampedSqArray[ (squareArray-sqAverage) > sqStdDev ] = sqAverage+sqStdDev
clampedSqArray[ (squareArray-sqAverage) < -sqStdDev ] = sqAverage-sqStdDev
print clampedSqArray # [[ 1. 2. 3. ]
print(clampedSqArray) # [[ 1. 2. 3. ]
# [ 3.90272394 0.31949828 3.90272394]
# [ 1. 1. 1. ]]


## Multiplying and dividing arrays by numbers does what you'd expect. It
## multiples/divides element-wise
print squareArray * 2 # [[ 2 4 6]
print(squareArray * 2) # [[ 2 4 6]
# [ 8 0 12]
# [ 2 2 2]]

## Addition works similarly:
print squareArray + np.ones( (3,3) ) #[[2 3 4]
print(squareArray + np.ones( (3,3) )) #[[2 3 4]
# [5 1 7]
# [2 2 2]]

## Multiplying two arrays together (of the same size) is also element wise
print squareArray * np.arange(1,10).reshape( (3,3) ) #[[ 1 4 9]
print(squareArray * np.arange(1,10).reshape( (3,3) )) #[[ 1 4 9]
# [16 0 36]
# [ 7 8 9]]

## Unless you use the dot(...) function, which does matrix multiplication
## from linear algebra:
matA = np.array( [[1,2],[3,4]] )
matB = np.array( [[5,6],[7,8]] )
print np.dot(matA,matB) #[[19 22]
print(np.dot(matA,matB)) #[[19 22]
# [43 50]]

## And thats it! There's a lot more to the numpy library, and there are a few
Expand Down
80 changes: 40 additions & 40 deletions Examples/Basic/pandas-tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,67 +20,67 @@
for i in range(1, 6):
ldt_timestamps.append(dt.datetime(2011, 1, i, 16))

print "The index we created has the following dates : "
print ldt_timestamps
print
print("The index we created has the following dates : ")
print(ldt_timestamps)
print()

## TimeSeries
ts_single_value = pd.TimeSeries(0.0, index=ldt_timestamps)
print "A timeseries initialized to one single value : "
## Series
ts_single_value = pd.Series(0.0, index=ldt_timestamps)
print("A timeseries initialized to one single value : ")

na_vals = np.arange(len(ldt_timestamps))
print "Dummy initialized array : "
print na_vals
print
print("Dummy initialized array : ")
print(na_vals)
print()

ts_array = pd.TimeSeries(na_vals, index=ldt_timestamps)
print "A timeseries initialized using a numpy array : "
print ts_array
print
ts_array = pd.Series(na_vals, index=ldt_timestamps)
print("A timeseries initialized using a numpy array : ")
print(ts_array)
print()

print "Reading the timeseries for a particular date"
print "Date : ", ldt_timestamps[1]
print "Value : ", ts_array[ldt_timestamps[1]]
print
print("Reading the timeseries for a particular date")
print("Date : ", ldt_timestamps[1])
print("Value : ", ts_array[ldt_timestamps[1]])
print()

print "Initializing a list of symbols : "
print("Initializing a list of symbols : ")
ls_symbols = ['AAPL', 'GOOG', 'MSFT', 'IBM']
print ls_symbols
print
print(ls_symbols)
print()

print "Initializing a dataframe with one value : "
print("Initializing a dataframe with one value : ")
df_single = pd.DataFrame(index=ldt_timestamps, columns=ls_symbols)
df_single = df_single.fillna(0.0)
print df_single
print
print(df_single)
print()

print "Initializing a dataframe with a numpy array : "
print("Initializing a dataframe with a numpy array : ")
na_vals_2 = np.random.randn(len(ldt_timestamps), len(ls_symbols))
df_vals = pd.DataFrame(na_vals_2, index=ldt_timestamps, columns=ls_symbols)
print df_vals
print
print(df_vals)
print()

print "Access the timeseries of a particular symbol : "
print df_vals[ls_symbols[1]]
print
print("Access the timeseries of a particular symbol : ")
print(df_vals[ls_symbols[1]])
print()

print "Access the timeseries of a particular date : "
print df_vals.ix[ldt_timestamps[1]]
print
print("Access the timeseries of a particular date : ")
print(df_vals.ix[ldt_timestamps[1]])
print()

print "Access the value for a specific symbol on a specific date: "
print df_vals[ls_symbols[1]].ix[ldt_timestamps[1]]
print
print("Access the value for a specific symbol on a specific date: ")
print(df_vals[ls_symbols[1]].ix[ldt_timestamps[1]])
print()

print "Reindexing the dataframe"
print("Reindexing the dataframe")
ldt_new_dates = [dt.datetime(2011, 1, 3, 16),
dt.datetime(2011, 1, 5, 16),
dt.datetime(2011, 1, 7, 16)]
ls_new_symbols = ['AAPL', 'IBM', 'XOM']
df_new = df_vals.reindex(index=ldt_new_dates, columns=ls_new_symbols)
print df_new
print "Observe that reindex carried over whatever values it could find and set the rest to NAN"
print
print(df_new)
print("Observe that reindex carried over whatever values it could find and set the rest to NAN")
print()

print "For pandas rolling statistics please refer : http://pandas.pydata.org/pandas-docs/dev/computation.html#moving-rolling-statistics-moments"
print("For pandas rolling statistics please refer : http://pandas.pydata.org/pandas-docs/dev/computation.html#moving-rolling-statistics-moments")

4 changes: 2 additions & 2 deletions Examples/Basic/tutorial1.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import matplotlib.pyplot as plt
import pandas as pd

print "Pandas Version", pd.__version__
print("Pandas Version", pd.__version__)


def main():
Expand Down Expand Up @@ -49,7 +49,7 @@ def main():
# Reading the data, now d_data is a dictionary with the keys above.
# Timestamps and symbols are the ones that were specified before.
ldf_data = c_dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))
d_data = dict(list(zip(ls_keys, ldf_data)))

# Filling the data for NAN
for s_key in ls_keys:
Expand Down
10 changes: 5 additions & 5 deletions Examples/Basic/tutorial2.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def main():
ls_symbols = ['$SPX', 'XOM', 'GOOG', 'GLD']

# Printing the first 5 rows
print "First 5 rows of Price Data:"
print na_price[:5, :]
print
print "First 5 rows of Dates:"
print na_dates[:5, :]
print("First 5 rows of Price Data:")
print(na_price[:5, :])
print()
print("First 5 rows of Dates:")
print(na_dates[:5, :])

# Creating the timestamps from dates read
ldt_timestamps = []
Expand Down
Loading