《NumPy Beginner's Guide》笔记Chapter3_Part1

# -*- coding: utf-8 -*-
import numpy as np
__author__ = 'ZengDong'
#日期 = 17:51
""" In this chapter, we shall cover the following topics: Functions working on arrays Loading arrays from files ‹‹ Writing arrays to files ‹‹ Simple mathematical and statistical functions """




""" 1. File I/O """
""" 1.1 savetxt As an example of file I/O, we will create an identity matrix and store its contents in a file. """
i2 = np.eye(2)
print(i2)
np.savetxt("eye.txt", i2)
""" 当前目录中出现文件: eye.txt 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 """


""" 2. CSV files (comma-separated values) 逗号分隔值 """
""" 2.1 loadtxt How do we deal with CSV files? Luckily, the loadtxt function can conveniently read CSV files, split up the fields, and load the data into NumPy arrays data.csv 填入以下内容: AAPL,28-01-2011, ,344.17,344.4,333.53,336.1,21144800 总结: We have set the delimiter to ','(comma), since we are dealing with a comma-separated value file. The usecols parameter is set through a tuple to get the seventh and eighth fields, which correspond to the close price and volume. unpack is set to True, which means that data will be unpacked and assigned to the c and v variables that will hold the close price and volume, respectively. """
c, v = np.loadtxt("data.csv", delimiter=",", usecols=(6, 7), unpack=True)
print(c, v)   #输出:(336.10000000000002, 21144800.0)



""" 3. Volume-weighted average price (VWAP) 加权求和然后除以权值之和 eg: 100 3 200 1 即:(100*3 + 200*1)/ (3+1) """
c, v = np.loadtxt("data.csv", delimiter=",", usecols=(6, 7), unpack=True)
vwap = np.average(c, weights=v)
print("VWAP = ", vwap)


""" 4. mean function """
m = np.arange(10)
mm = np.mean(m)
print(mm)    #输出:4.5



""" 5. Time-weithted average price The idea is that recent price quotes are more important, so we should give recent prices higher weights """
t = np.arange(len(c))
print("twap = ", np.average(c, weights=t))



""" 6. Value range min max ptp: 即: peak-to-peak distance returns the difference between the maximum and minimum values of an array. """
#First, we will need to read our file again and store the values for the high and low prices into arrays.
h, l = np.loadtxt("data.csv", delimiter=",", usecols=(4, 5), unpack=True)
#the following code get the price range
print("highest = ", np.max(h))
print("lowest = ", np.min(l))

print("spread high price = ", np.ptp(h))
print("spread low price = ", np.ptp(l))

""" 输出: ('highest = ', 364.89999999999998) ('lowest = ', 333.52999999999997) ('spread high price = ', 24.859999999999957) ('spread low price = ', 26.970000000000027) """




""" 7. Statistics median msort var variance """
#Determine the median of the close price.
c = np.loadtxt("data.csv", delimiter=",", usecols=(6, ), unpack=True)
print("median = ", np.median(c))  #输出:('median = ', 352.05500000000001)

#we would like to check whether this is correct.
#we will just mimic the median algorithm by sorting the close price array and
#printing the middle value of the sorted array.
sorted_close = np.msort(c)
print("sorted_clost = ", sorted_close)
N = len(sorted_close)
print("middle = ", sorted_close[(N - 1) / 2]) #输出('middle = ', 351.99000000000001)

#that's a different value than the one the median function gave us. How come?
#our naive algorithm only works for arrays with odd lengths
#For even-length arrays, the median is calculated from the average of the two array values in the middle.
print("average middle = ", (sorted_close[N/2] + sorted_close[(N - 1) / 2]) / 2)
#输出: ('average middle = ', 352.05500000000001)


#Another statistical measure that we are concerned with is variance
print("variance = ", np.var(c))  #输出 ('variance = ', 50.126517888888884)
print("variance from definition = ", np.mean((c - c.mean()) ** 2))  #同上
""" 此处variance: The variance is defined as the mean of the square of deviations from the mean, divided by the number of elements in the array. 而不是除以 size(N)-1 """

print("888888888888888888888888888888888888888888888888888888888888")
""" 8. Stock returns In academic literature it is more common to base analysis on stock returns and log returns of the close price (1)Simple returns are just the rate of change from one value to the next 后一项减去前一项 (2)Logarithmic returns or log returns are determined by taking the log of all the prices and calculating the differences between them. log之后再相减,即loga - logb = log(a/b) diff函数: returns an array built up of the difference between two consecutive array elements The array returned by diff is one element shorter than the close prices array log函数: where函数:which returns the indices of an array that satisfies a condition """
c = np.loadtxt("data.csv", delimiter=",", usecols=(6,), unpack=True)
returns = np.diff(c) / c[:-1]   #因为diff之后少了一项,故除以 1 - (n-1)
print("Standard deviation = ", np.std(returns))


logreturns = np.diff(np.log(c))
#we would have to check that the input array doesn't have zeroes or negative numbers.
posretindices = np.where(returns > 0)
print("Indices with positive returns", posretindices)  #输出正数的下标

#In investing, volatility measures price variation of a financial security
annual_volatility = np.std(logreturns) / np.mean(logreturns)
annual_volatility = annual_volatility / np.sqrt(1./252.)
print("Annual volatility", annual_volatility)
print("Monthly volatility", annual_volatility * np.sqrt(1./12.))








print("99999999999999999999999999999999999999999999999999999999999")
""" 9. Dates where : returns indices of the array for elements that conform to a specified condition. take: The take function can use these indices and takes the values of the corresponding array items argmax: returned the index of the highest value in the array """
from datetime import datetime
#Execute the script and the following error will appear:
#dates, close = np.loadtxt("data.csv", delimiter=",", usecols=(1, 6), unpack=True)
""" 1. error: ValueError: invalid literal for float(): 28-01-2011 2. NumPy tried to convert the dates into floats 3. The loadtxt function has a special parameter for this purpose. """
def datestr2num(s):
    return datetime.strptime(s, "%d-%m-%Y").date().weekday()
#Now we will hook up our date converter function to load the data.
dates, close = np.loadtxt("data.csv", delimiter=",", usecols=(1, 6), converters={1 : datestr2num}, unpack=True)
print(dates)  #输出:[ 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4. 1. 2. 3. 4. 0. 1. 2. 3. 4. 0. 1. 2. 3. 4.]

#We will now make an array that has five elements for each day of the week
averages = np.zeros(5)


#We get the indices with the where function for each day and store it in the indices array.
#Then, we retrieve the values corresponding to the indices, using the take function.
for i in range(5):
    indices = np.where(dates == i)
    prices = np.take(close, indices)
    avg = np.mean(prices)
    print("Day", i, "prices", prices, "Average", avg)
    averages[i] = avg
""" 输出: ('Day', 0, 'prices', array([[ 339.32, 351.88, 359.18, 353.21, 355.36]]), 'Average', 351.79000000000008) ('Day', 1, 'prices', array([[ 345.03, 355.2 , 359.9 , 338.61, 349.31, 355.76]]), 'Average', 350.63500000000005) ('Day', 2, 'prices', array([[ 344.32, 358.16, 363.13, 342.62, 352.12, 352.47]]), 'Average', 352.1366666666666) ('Day', 3, 'prices', array([[ 343.44, 354.54, 358.3 , 342.88, 359.56, 346.67]]), 'Average', 350.89833333333331) ('Day', 4, 'prices', array([[ 336.1 , 346.5 , 356.85, 350.56, 348.16, 360. , 351.99]]), 'Average', 350.02285714285711) """

#you can go ahead and find out which day has the highest, and which the lowest, average.
top = np.max(averages)
print("Highest average", top)
print("Top day in the week", np.argmax(averages))
bootom = np.min(averages)
print("Lowest average", bootom)
print("Bottom day of the week", np.argmin(averages))

""" 输出: ('Highest average', 352.1366666666666) ('Top day in the week', 2) ('Lowest average', 350.02285714285711) ('Bottom day of the week', 4) """



print("10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 ")
""" 10. Weekly summary If you are interested in the cotton market and have decades of data, you might want to summarize and compress the data even further. ravel函数: Flatten array apply_along_axis函数: calls another function, to operate on each of the elements of an array savetxt函数: we specify a filename, the array we want to store, a delimiter (in this case a comma), and the format we want to store floating point numbers in. c character d or i signed decimal integer e or E scientific notation with e or E f decimal floating point g or G use the shorter of e, E, or f o signed octal s string of characters u unsigned decimal integer x or X unsigned hexadecimal integer """
dates, open, high, low, close = np.loadtxt("data.csv", delimiter=",", usecols=(1, 3, 4, 5, 6), converters={1:datestr2num}, unpack=True)



#we will just have a look at the first three weeks in the sample—you can later have a go at improving this
close = close[:16]
dates = dates[:16]

#commencing, we will find the first Monday in our sample data.
first_monday = np.ravel(np.where(dates == 0))[0]
print("The first Monday index is", first_monday)   #输出:('The first Monday index is', 1)

#The next logical step is to find the last Friday in the sample
last_friday = np.ravel(np.where(dates == 4))[-1]
print("The last Friday index is", last_friday)    #输出('The last Friday index is', 15)



#create an array with the indices of all the days in the three weeks:
weeks_indices = np.arange(first_monday, last_friday + 1)
print("Weeks indices initial", weeks_indices)


#split the array in pieces of size 5 with split function
weeks_indices = np.split(weeks_indices, 3)
print("Weeks indices after split", weeks_indices)  #输出:('Weeks indices after split', [array([1, 2, 3, 4, 5], dtype=int64), array([ 6, 7, 8, 9, 10], dtype=int64), array([11, 12, 13, 14, 15], dtype=int64)])


#call the apply_along_axit function by supplying the name of our function, call summarize
def summarize(a, o, h, l,c):
    monday_open = o[a[0]]
    week_high = np.max(np.take(h, a))
    week_low = np.min(np.take(l, a))
    friday_close = c[a[-1]]
    return("Appl", monday_open, week_high, week_low, friday_close)
weeksummary = np.apply_along_axis(summarize, 1, weeks_indices, open, high, low, close)
print("Week summary", weeksummary);



#store the data in a file with the Numpy savetxt function:
np.savetxt("weekssummary.csv", weeksummary, delimiter=",", fmt="%s")


你可能感兴趣的:(numpy)