import numpy as np
__author__ = 'ZengDong'
""" In this chapter, we shall cover the following topics: Functions working on arrays Loading arrays from files ‹‹ Writing arrays to files ‹‹ Simple mathematical and statistical functions """
""" 1. File I/O """
""" 1.1 savetxt As an example of file I/O, we will create an identity matrix and store its contents in a file. """
i2 = np.eye(2)
print(i2)
np.savetxt("eye.txt", i2)
""" 当前目录中出现文件: eye.txt 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 """
""" 2. CSV files (comma-separated values) 逗号分隔值 """
""" 2.1 loadtxt How do we deal with CSV files? Luckily, the loadtxt function can conveniently read CSV files, split up the fields, and load the data into NumPy arrays data.csv 填入以下内容: AAPL,28-01-2011, ,344.17,344.4,333.53,336.1,21144800 总结: We have set the delimiter to ','(comma), since we are dealing with a comma-separated value file. The usecols parameter is set through a tuple to get the seventh and eighth fields, which correspond to the close price and volume. unpack is set to True, which means that data will be unpacked and assigned to the c and v variables that will hold the close price and volume, respectively. """
c, v = np.loadtxt("data.csv", delimiter=",", usecols=(6, 7), unpack=True)
print(c, v)
""" 3. Volume-weighted average price (VWAP) 加权求和然后除以权值之和 eg: 100 3 200 1 即:(100*3 + 200*1)/ (3+1) """
c, v = np.loadtxt("data.csv", delimiter=",", usecols=(6, 7), unpack=True)
vwap = np.average(c, weights=v)
print("VWAP = ", vwap)
""" 4. mean function """
m = np.arange(10)
mm = np.mean(m)
print(mm)
""" 5. Time-weithted average price The idea is that recent price quotes are more important, so we should give recent prices higher weights """
t = np.arange(len(c))
print("twap = ", np.average(c, weights=t))
""" 6. Value range min max ptp: 即: peak-to-peak distance returns the difference between the maximum and minimum values of an array. """
h, l = np.loadtxt("data.csv", delimiter=",", usecols=(4, 5), unpack=True)
print("highest = ", np.max(h))
print("lowest = ", np.min(l))
print("spread high price = ", np.ptp(h))
print("spread low price = ", np.ptp(l))
""" 输出: ('highest = ', 364.89999999999998) ('lowest = ', 333.52999999999997) ('spread high price = ', 24.859999999999957) ('spread low price = ', 26.970000000000027) """
""" 7. Statistics median msort var variance """
c = np.loadtxt("data.csv", delimiter=",", usecols=(6, ), unpack=True)
print("median = ", np.median(c))
sorted_close = np.msort(c)
print("sorted_clost = ", sorted_close)
N = len(sorted_close)
print("middle = ", sorted_close[(N - 1) / 2])
print("average middle = ", (sorted_close[N/2] + sorted_close[(N - 1) / 2]) / 2)
print("variance = ", np.var(c))
print("variance from definition = ", np.mean((c - c.mean()) ** 2))
""" 此处variance: The variance is defined as the mean of the square of deviations from the mean, divided by the number of elements in the array. 而不是除以 size(N)-1 """
print("888888888888888888888888888888888888888888888888888888888888")
""" 8. Stock returns In academic literature it is more common to base analysis on stock returns and log returns of the close price (1)Simple returns are just the rate of change from one value to the next 后一项减去前一项 (2)Logarithmic returns or log returns are determined by taking the log of all the prices and calculating the differences between them. log之后再相减,即loga - logb = log(a/b) diff函数: returns an array built up of the difference between two consecutive array elements The array returned by diff is one element shorter than the close prices array log函数: where函数:which returns the indices of an array that satisfies a condition """
c = np.loadtxt("data.csv", delimiter=",", usecols=(6,), unpack=True)
returns = np.diff(c) / c[:-1]
print("Standard deviation = ", np.std(returns))
logreturns = np.diff(np.log(c))
posretindices = np.where(returns > 0)
print("Indices with positive returns", posretindices)
annual_volatility = np.std(logreturns) / np.mean(logreturns)
annual_volatility = annual_volatility / np.sqrt(1./252.)
print("Annual volatility", annual_volatility)
print("Monthly volatility", annual_volatility * np.sqrt(1./12.))
print("99999999999999999999999999999999999999999999999999999999999")
""" 9. Dates where : returns indices of the array for elements that conform to a specified condition. take: The take function can use these indices and takes the values of the corresponding array items argmax: returned the index of the highest value in the array """
from datetime import datetime
""" 1. error: ValueError: invalid literal for float(): 28-01-2011 2. NumPy tried to convert the dates into floats 3. The loadtxt function has a special parameter for this purpose. """
def datestr2num(s):
return datetime.strptime(s, "%d-%m-%Y").date().weekday()
dates, close = np.loadtxt("data.csv", delimiter=",", usecols=(1, 6), converters={1 : datestr2num}, unpack=True)
print(dates)
averages = np.zeros(5)
for i in range(5):
indices = np.where(dates == i)
prices = np.take(close, indices)
avg = np.mean(prices)
print("Day", i, "prices", prices, "Average", avg)
averages[i] = avg
""" 输出: ('Day', 0, 'prices', array([[ 339.32, 351.88, 359.18, 353.21, 355.36]]), 'Average', 351.79000000000008) ('Day', 1, 'prices', array([[ 345.03, 355.2 , 359.9 , 338.61, 349.31, 355.76]]), 'Average', 350.63500000000005) ('Day', 2, 'prices', array([[ 344.32, 358.16, 363.13, 342.62, 352.12, 352.47]]), 'Average', 352.1366666666666) ('Day', 3, 'prices', array([[ 343.44, 354.54, 358.3 , 342.88, 359.56, 346.67]]), 'Average', 350.89833333333331) ('Day', 4, 'prices', array([[ 336.1 , 346.5 , 356.85, 350.56, 348.16, 360. , 351.99]]), 'Average', 350.02285714285711) """
top = np.max(averages)
print("Highest average", top)
print("Top day in the week", np.argmax(averages))
bootom = np.min(averages)
print("Lowest average", bootom)
print("Bottom day of the week", np.argmin(averages))
""" 输出: ('Highest average', 352.1366666666666) ('Top day in the week', 2) ('Lowest average', 350.02285714285711) ('Bottom day of the week', 4) """
print("10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 ")
""" 10. Weekly summary If you are interested in the cotton market and have decades of data, you might want to summarize and compress the data even further. ravel函数: Flatten array apply_along_axis函数: calls another function, to operate on each of the elements of an array savetxt函数: we specify a filename, the array we want to store, a delimiter (in this case a comma), and the format we want to store floating point numbers in. c character d or i signed decimal integer e or E scientific notation with e or E f decimal floating point g or G use the shorter of e, E, or f o signed octal s string of characters u unsigned decimal integer x or X unsigned hexadecimal integer """
dates, open, high, low, close = np.loadtxt("data.csv", delimiter=",", usecols=(1, 3, 4, 5, 6), converters={1:datestr2num}, unpack=True)
close = close[:16]
dates = dates[:16]
first_monday = np.ravel(np.where(dates == 0))[0]
print("The first Monday index is", first_monday)
last_friday = np.ravel(np.where(dates == 4))[-1]
print("The last Friday index is", last_friday)
weeks_indices = np.arange(first_monday, last_friday + 1)
print("Weeks indices initial", weeks_indices)
weeks_indices = np.split(weeks_indices, 3)
print("Weeks indices after split", weeks_indices)
def summarize(a, o, h, l,c):
monday_open = o[a[0]]
week_high = np.max(np.take(h, a))
week_low = np.min(np.take(l, a))
friday_close = c[a[-1]]
return("Appl", monday_open, week_high, week_low, friday_close)
weeksummary = np.apply_along_axis(summarize, 1, weeks_indices, open, high, low, close)
print("Week summary", weeksummary);
np.savetxt("weekssummary.csv", weeksummary, delimiter=",", fmt="%s")