python打卡DAY6

import seaborn as sns

import matplotlib.pyplot as plt

import pandas as pd

data=pd.read_csv(r'data.csv')

sns.boxplot(x=data['Annual Income'])

plt.figure(figsize=(6,4))

plt.tight_layout

plt.title('annual income photo')

plt.xlabel('annual income')

#plt.show

plt.rcParams['font.sans-serif']=['STHeiti']

plt.rcParams['axes.unicode_minus']=True

sns.boxplot(x=data['Annual Income'])

plt.tight_layout

plt.figure(figsize=(6,4))

plt.title('年收入箱线图')

plt.xlabel('年收入')

#plt.show

print(data.columns)

plt.figure(figsize=(6,4))

sns.histplot(data['Years in current job'])

plt.xticks(rotation=45,ha='right')

plt.tight_layout

plt.title('在当前工作年限直方图')

plt.xlabel('在当前工作年限')

plt.ylabel('员工数量')

#plt.show()

plt.figure(figsize=(6,4))

sns.boxplot(x='Credit Default',y='Annual Income',data=data)

plt.tight_layout

plt.title('Annual income vs.credit default')

plt.xlabel('credit default')

plt.ylabel('annual income')

#plt.show()

plt.figure(figsize=(6,4))

sns.histplot(x='Annual Income',hue='Credit Default',data=data,kde=True,element='step')

plt.tight_layout

plt.title('Annual Income vs.credit default')

plt.xlabel('annual income')

plt.ylabel('count')

#plt.show

plt.figure(figsize=(6,4))

sns.countplot(x='Number of Open Accounts',hue='Credit Default',data=data)

#解释:这里 x='Number of Open Account' 表示 data 这个 DataFrame 中的一列。data=data 明确告诉 seaborn 去这个 DataFrame 中找列。seaborn 内部会根据 x 和 hue 的列名在 data 中自动查找和处理。

plt.xticks(rotation=45,ha='right')

plt.tight_layout

plt.title('number of open account vs.credit default')

plt.xlabel('number of open account')

plt.ylabel('count')

plt.show

data['Open Accounts Group'] = pd.cut(data['Number of Open Accounts'],bins=[0,5,10,15,20,float('inf')],labels=['0-5','6-10','11-15','16-20','20+'])

plt.figure(figsize=(6,4))

sns.countplot(x='Open Accounts Group',hue='Credit Default',data=data)

plt.title('Number of Open Accounts (Grouped) vs.Credit Default')

plt.xlabel('number of open accounts group')

plt.ylabel('count')

plt.show()


 

import pandas as pd

data = pd.read_csv(r'data.csv')

print(data.info())

print(data.isnull().sum())

print(data.head())

for columns in data.columns:

if data[columns].dtype!='object':

data[columns].fillna(data[columns].mean(),inplace=True)

else:

data[columns].fillna(data[columns].mode()[0],inplace=True)

data=pd.get_dummies(data,drop_first=True)

discrete_list=[]

data2=pd.read_csv(r'data.csv')

for i in data.columns:

if i not in data2.columns:

discrete_list.append(i)

for i in discrete_list:

data[i]=data[i].astype(int)

print(data.head())

你可能感兴趣的:(python,pandas,数据分析)