《深入浅出Pandas》第一章代码
数据来源:https://www.gairuo.com/file/data/dataset/team.xlsx
import numpy as np
import pandas as pd
file_path = 'E:\\Data_python\\anconda_code\\Dive_into_Pandas\\data_files\\'
team_path = 'team.xlsx'
team_df = pd.read_excel(file_path + team_path)
team_df.head(5)
|
name |
team |
Q1 |
Q2 |
Q3 |
Q4 |
0 |
Liver |
E |
89 |
21 |
24 |
64 |
1 |
Arry |
C |
36 |
37 |
37 |
57 |
2 |
Ack |
A |
57 |
60 |
18 |
84 |
3 |
Eorge |
C |
93 |
96 |
71 |
78 |
4 |
Oah |
D |
65 |
49 |
61 |
86 |
team_df.tail(5)
|
name |
team |
Q1 |
Q2 |
Q3 |
Q4 |
95 |
Gabriel |
C |
48 |
59 |
87 |
74 |
96 |
Austin7 |
C |
21 |
31 |
30 |
43 |
97 |
Lincoln4 |
C |
98 |
93 |
1 |
20 |
98 |
Eli |
E |
11 |
74 |
58 |
91 |
99 |
Ben |
E |
21 |
43 |
41 |
74 |
team_df.sample(6)
|
name |
team |
Q1 |
Q2 |
Q3 |
Q4 |
33 |
Adam |
C |
90 |
32 |
47 |
39 |
54 |
Ollie3 |
C |
10 |
76 |
30 |
36 |
82 |
Finn |
E |
4 |
1 |
55 |
32 |
85 |
Liam |
B |
2 |
80 |
24 |
25 |
58 |
Lewis |
B |
4 |
34 |
77 |
28 |
27 |
Finley |
D |
62 |
73 |
84 |
68 |
team_df.shape
(100, 6)
team_df.info()
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 name 100 non-null object
1 team 100 non-null object
2 Q1 100 non-null int64
3 Q2 100 non-null int64
4 Q3 100 non-null int64
5 Q4 100 non-null int64
dtypes: int64(4), object(2)
memory usage: 4.8+ KB
team_df.describe()
|
Q1 |
Q2 |
Q3 |
Q4 |
count |
100.000000 |
100.000000 |
100.000000 |
100.000000 |
mean |
49.200000 |
52.550000 |
52.670000 |
52.780000 |
std |
29.962603 |
29.845181 |
26.543677 |
27.818524 |
min |
1.000000 |
1.000000 |
1.000000 |
2.000000 |
25% |
19.500000 |
26.750000 |
29.500000 |
29.500000 |
50% |
51.500000 |
49.500000 |
55.000000 |
53.000000 |
75% |
74.250000 |
77.750000 |
76.250000 |
75.250000 |
max |
98.000000 |
99.000000 |
99.000000 |
99.000000 |
team_df.dtypes
name object
team object
Q1 int64
Q2 int64
Q3 int64
Q4 int64
dtype: object
team_df.axes
[RangeIndex(start=0, stop=100, step=1),
Index(['name', 'team', 'Q1', 'Q2', 'Q3', 'Q4'], dtype='object')]
team_df.columns
Index(['name', 'team', 'Q1', 'Q2', 'Q3', 'Q4'], dtype='object')
team_df.set_index('name', inplace=True)
team_df
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
Arry |
C |
36 |
37 |
37 |
57 |
Ack |
A |
57 |
60 |
18 |
84 |
Eorge |
C |
93 |
96 |
71 |
78 |
Oah |
D |
65 |
49 |
61 |
86 |
... |
... |
... |
... |
... |
... |
Gabriel |
C |
48 |
59 |
87 |
74 |
Austin7 |
C |
21 |
31 |
30 |
43 |
Lincoln4 |
C |
98 |
93 |
1 |
20 |
Eli |
E |
11 |
74 |
58 |
91 |
Ben |
E |
21 |
43 |
41 |
74 |
100 rows × 5 columns
team_df['Q1']
name
Liver 89
Arry 36
Ack 57
Eorge 93
Oah 65
..
Gabriel 48
Austin7 21
Lincoln4 98
Eli 11
Ben 21
Name: Q1, Length: 100, dtype: int64
team_df.Q1
name
Liver 89
Arry 36
Ack 57
Eorge 93
Oah 65
..
Gabriel 48
Austin7 21
Lincoln4 98
Eli 11
Ben 21
Name: Q1, Length: 100, dtype: int64
team_df[['Q1','Q3']]
|
Q1 |
Q3 |
name |
|
|
Liver |
89 |
24 |
Arry |
36 |
37 |
Ack |
57 |
18 |
Eorge |
93 |
71 |
Oah |
65 |
61 |
... |
... |
... |
Gabriel |
48 |
87 |
Austin7 |
21 |
30 |
Lincoln4 |
98 |
1 |
Eli |
11 |
58 |
Ben |
21 |
41 |
100 rows × 2 columns
team_df.loc[:,['Q1','Q2']]
|
Q1 |
Q2 |
name |
|
|
Liver |
89 |
21 |
Arry |
36 |
37 |
Ack |
57 |
60 |
Eorge |
93 |
96 |
Oah |
65 |
49 |
... |
... |
... |
Gabriel |
48 |
59 |
Austin7 |
21 |
31 |
Lincoln4 |
98 |
93 |
Eli |
11 |
74 |
Ben |
21 |
43 |
100 rows × 2 columns
team_df[team_df.index == 'Liver']
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
team_df[0:3]
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
Arry |
C |
36 |
37 |
37 |
57 |
Ack |
A |
57 |
60 |
18 |
84 |
team_df[0:10:2]
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
Ack |
A |
57 |
60 |
18 |
84 |
Oah |
D |
65 |
49 |
61 |
86 |
Acob |
B |
61 |
95 |
94 |
8 |
Reddie |
D |
64 |
93 |
57 |
72 |
team_df.iloc[:10, :]
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
Arry |
C |
36 |
37 |
37 |
57 |
Ack |
A |
57 |
60 |
18 |
84 |
Eorge |
C |
93 |
96 |
71 |
78 |
Oah |
D |
65 |
49 |
61 |
86 |
Harlie |
C |
24 |
13 |
87 |
43 |
Acob |
B |
61 |
95 |
94 |
8 |
Lfie |
A |
9 |
10 |
99 |
37 |
Reddie |
D |
64 |
93 |
57 |
72 |
Oscar |
A |
77 |
9 |
26 |
67 |
team_df.loc['Ben', 'Q1':'Q4']
team_df.loc['Eorge':'Alexander', 'team':'Q4']
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Eorge |
C |
93 |
96 |
71 |
78 |
Oah |
D |
65 |
49 |
61 |
86 |
Harlie |
C |
24 |
13 |
87 |
43 |
Acob |
B |
61 |
95 |
94 |
8 |
Lfie |
A |
9 |
10 |
99 |
37 |
Reddie |
D |
64 |
93 |
57 |
72 |
Oscar |
A |
77 |
9 |
26 |
67 |
Leo |
B |
17 |
4 |
33 |
79 |
Logan |
B |
9 |
89 |
35 |
65 |
Archie |
C |
83 |
89 |
59 |
68 |
Theo |
C |
51 |
86 |
87 |
27 |
Thomas |
B |
80 |
48 |
56 |
41 |
James |
E |
48 |
77 |
52 |
11 |
Joshua |
A |
63 |
4 |
80 |
30 |
Henry |
A |
91 |
15 |
75 |
17 |
William |
C |
80 |
68 |
3 |
26 |
Max |
E |
97 |
75 |
41 |
3 |
Lucas |
A |
60 |
41 |
77 |
62 |
Ethan |
D |
79 |
45 |
89 |
88 |
Arthur |
A |
44 |
53 |
42 |
40 |
Mason |
D |
80 |
96 |
26 |
49 |
Isaac |
E |
74 |
23 |
28 |
65 |
Harrison |
B |
89 |
13 |
18 |
75 |
Teddy |
E |
71 |
91 |
21 |
48 |
Finley |
D |
62 |
73 |
84 |
68 |
Daniel |
C |
50 |
50 |
72 |
61 |
Riley |
E |
35 |
26 |
59 |
83 |
Edward |
B |
57 |
38 |
86 |
87 |
Joseph |
E |
67 |
87 |
87 |
93 |
Alexander |
C |
91 |
76 |
26 |
79 |
team_df[team_df.team == 'C']
team_df[team_df.Q1 > 90]
team_df[team_df.index=='Qsar']
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Oscar |
A |
77 |
9 |
26 |
67 |
team_df[(team_df.Q1 > 90) & (team_df.team == 'C')]
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Eorge |
C |
93 |
96 |
71 |
78 |
Alexander |
C |
91 |
76 |
26 |
79 |
Lincoln4 |
C |
98 |
93 |
1 |
20 |
team_df[team_df['team']=='C'].loc[team_df.Q1 > 90]
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Eorge |
C |
93 |
96 |
71 |
78 |
Alexander |
C |
91 |
76 |
26 |
79 |
Lincoln4 |
C |
98 |
93 |
1 |
20 |
排序
team_df.sort_values(by='Q1')
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Sebastian |
C |
1 |
14 |
68 |
48 |
Harley |
B |
2 |
99 |
12 |
13 |
Liam |
B |
2 |
80 |
24 |
25 |
Lewis |
B |
4 |
34 |
77 |
28 |
Finn |
E |
4 |
1 |
55 |
32 |
... |
... |
... |
... |
... |
... |
Eorge |
C |
93 |
96 |
71 |
78 |
Aaron |
A |
96 |
75 |
55 |
8 |
Elijah |
B |
97 |
89 |
15 |
46 |
Max |
E |
97 |
75 |
41 |
3 |
Lincoln4 |
C |
98 |
93 |
1 |
20 |
100 rows × 5 columns
team_df.sort_values(by='Q3', ascending=False)
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
David |
B |
21 |
47 |
99 |
2 |
Lfie |
A |
9 |
10 |
99 |
37 |
Luke6 |
D |
15 |
97 |
95 |
99 |
Acob |
B |
61 |
95 |
94 |
8 |
Blake |
A |
78 |
23 |
93 |
9 |
... |
... |
... |
... |
... |
... |
Harley |
B |
2 |
99 |
12 |
13 |
Jake3 |
C |
69 |
23 |
11 |
40 |
Jayden6 |
D |
64 |
21 |
10 |
21 |
William |
C |
80 |
68 |
3 |
26 |
Lincoln4 |
C |
98 |
93 |
1 |
20 |
100 rows × 5 columns
team_df.sort_values(['team','Q2'], ascending=[True, False])
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
name |
|
|
|
|
|
Dylan |
A |
86 |
87 |
65 |
20 |
Nathan |
A |
87 |
77 |
62 |
13 |
Aaron |
A |
96 |
75 |
55 |
8 |
Stanley |
A |
69 |
71 |
39 |
97 |
Ack |
A |
57 |
60 |
18 |
84 |
... |
... |
... |
... |
... |
... |
Liver |
E |
89 |
21 |
24 |
64 |
Rory9 |
E |
8 |
12 |
58 |
27 |
Jackson5 |
E |
6 |
10 |
15 |
33 |
Roman |
E |
73 |
1 |
25 |
44 |
Finn |
E |
4 |
1 |
55 |
32 |
100 rows × 5 columns
分组聚合
team_df.groupby('team').sum()
|
Q1 |
Q2 |
Q3 |
Q4 |
team |
|
|
|
|
A |
1066 |
639 |
875 |
783 |
B |
975 |
1218 |
1202 |
1136 |
C |
1056 |
1194 |
1068 |
1127 |
D |
860 |
1191 |
1241 |
1199 |
E |
963 |
1013 |
881 |
1033 |
team_df.groupby('team').mean()
|
Q1 |
Q2 |
Q3 |
Q4 |
team |
|
|
|
|
A |
62.705882 |
37.588235 |
51.470588 |
46.058824 |
B |
44.318182 |
55.363636 |
54.636364 |
51.636364 |
C |
48.000000 |
54.272727 |
48.545455 |
51.227273 |
D |
45.263158 |
62.684211 |
65.315789 |
63.105263 |
E |
48.150000 |
50.650000 |
44.050000 |
51.650000 |
team_df.groupby('team').agg({'Q1': sum,
'Q2': 'count',
'Q3': 'mean',
'Q4': max})
|
Q1 |
Q2 |
Q3 |
Q4 |
team |
|
|
|
|
A |
1066 |
17 |
51.470588 |
97 |
B |
975 |
22 |
54.636364 |
99 |
C |
1056 |
22 |
48.545455 |
98 |
D |
860 |
19 |
65.315789 |
99 |
E |
963 |
20 |
44.050000 |
98 |
数据转换
team_df.groupby('team').sum()
|
Q1 |
Q2 |
Q3 |
Q4 |
team |
|
|
|
|
A |
1066 |
639 |
875 |
783 |
B |
975 |
1218 |
1202 |
1136 |
C |
1056 |
1194 |
1068 |
1127 |
D |
860 |
1191 |
1241 |
1199 |
E |
963 |
1013 |
881 |
1033 |
team_df.groupby('team').sum().T
team |
A |
B |
C |
D |
E |
Q1 |
1066 |
975 |
1056 |
860 |
963 |
Q2 |
639 |
1218 |
1194 |
1191 |
1013 |
Q3 |
875 |
1202 |
1068 |
1241 |
881 |
Q4 |
783 |
1136 |
1127 |
1199 |
1033 |
team_df.groupby('team').sum().stack()
team
A Q1 1066
Q2 639
Q3 875
Q4 783
B Q1 975
Q2 1218
Q3 1202
Q4 1136
C Q1 1056
Q2 1194
Q3 1068
Q4 1127
D Q1 860
Q2 1191
Q3 1241
Q4 1199
E Q1 963
Q2 1013
Q3 881
Q4 1033
dtype: int64
team_df.groupby('team').sum().unstack()
team
Q1 A 1066
B 975
C 1056
D 860
E 963
Q2 A 639
B 1218
C 1194
D 1191
E 1013
Q3 A 875
B 1202
C 1068
D 1241
E 881
Q4 A 783
B 1136
C 1127
D 1199
E 1033
dtype: int64
添加列
team_df['grade'] = 'Grade 1'
team_df.head(5)
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
grade |
name |
|
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
Grade 1 |
Arry |
C |
36 |
37 |
37 |
57 |
Grade 1 |
Ack |
A |
57 |
60 |
18 |
84 |
Grade 1 |
Eorge |
C |
93 |
96 |
71 |
78 |
Grade 1 |
Oah |
D |
65 |
49 |
61 |
86 |
Grade 1 |
team_df['total'] = team_df['Q1'] + team_df['Q2'] + team_df['Q3'] + team_df['Q4']
team_df.head(5)
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
grade |
total |
name |
|
|
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
Grade 1 |
198 |
Arry |
C |
36 |
37 |
37 |
57 |
Grade 1 |
167 |
Ack |
A |
57 |
60 |
18 |
84 |
Grade 1 |
219 |
Eorge |
C |
93 |
96 |
71 |
78 |
Grade 1 |
338 |
Oah |
D |
65 |
49 |
61 |
86 |
Grade 1 |
261 |
team_df['total1'] = team_df.loc[:,'Q1':'Q4'].apply(lambda x : sum(x), axis=1)
team_df['total2'] = team_df.sum(axis=1)
team_df['avg'] = team_df.total/4
team_df
|
team |
Q1 |
Q2 |
Q3 |
Q4 |
grade |
total |
total1 |
total2 |
avg |
name |
|
|
|
|
|
|
|
|
|
|
Liver |
E |
89 |
21 |
24 |
64 |
Grade 1 |
198 |
198 |
1188 |
49.50 |
Arry |
C |
36 |
37 |
37 |
57 |
Grade 1 |
167 |
167 |
1002 |
41.75 |
Ack |
A |
57 |
60 |
18 |
84 |
Grade 1 |
219 |
219 |
1314 |
54.75 |
Eorge |
C |
93 |
96 |
71 |
78 |
Grade 1 |
338 |
338 |
2028 |
84.50 |
Oah |
D |
65 |
49 |
61 |
86 |
Grade 1 |
261 |
261 |
1566 |
65.25 |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
Gabriel |
C |
48 |
59 |
87 |
74 |
Grade 1 |
268 |
268 |
1608 |
67.00 |
Austin7 |
C |
21 |
31 |
30 |
43 |
Grade 1 |
125 |
125 |
750 |
31.25 |
Lincoln4 |
C |
98 |
93 |
1 |
20 |
Grade 1 |
212 |
212 |
1272 |
53.00 |
Eli |
E |
11 |
74 |
58 |
91 |
Grade 1 |
234 |
234 |
1404 |
58.50 |
Ben |
E |
21 |
43 |
41 |
74 |
Grade 1 |
179 |
179 |
1074 |
44.75 |
100 rows × 10 columns