02.心电图心跳信号多分类预测挑战赛【数据分析】

1. 导入数据科学及可视化包

#coding:utf-8
#导入warnings包,利用过滤器来实现忽略警告语句。
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from pandas import DataFrame,Series
import matplotlib.pyplot as plt 
import seaborn as sns
import numpy as np

2.加载数据,检查数据头尾

# Train
Train_data = pd.read_csv('data/train.csv')
Train_data.head().append(Train_data.tail())
print(Train_data.shape)
# Test
Test_data = pd.read_csv('data/testA.csv')
Test_data.head().append(Test_data.tail())
print(Test_data.shape)

3.describe and info

Train_data.describe()
Train_data.info()
Test_data.describe()
Test_data.info()

4.检查空值

Train_data.isnull().sum()
Test_data.isnull().sum()

5.label分布情况(概率密度,峰度,偏度,频数)

Train_data['label'].value_counts()

可视化

## 1) 总体分布概况(无界约翰逊分布等)
import scipy.stats as st
y = Train_data['label']
plt.figure(1); plt.title('Default')
sns.distplot(y, rug=True, bins=20)
plt.figure(2); plt.title('Normal')
sns.distplot(y, kde=False, fit=st.norm)
plt.figure(3); plt.title('Log Normal')
sns.distplot(y, kde=False, fit=st.lognorm)
# 2)查看skewness and kurtosis
sns.distplot(Train_data['label']);
print("Skewness: %f" % Train_data['label'].skew())
print("Kurtosis: %f" % Train_data['label'].kurt())
print(Train_data.skew(), Train_data.kurt())
sns.distplot(Train_data.kurt(),color='orange',axlabel ='Kurtness')
# 3) 查看预测值的具体频数
plt.hist(Train_data['label'], orientation = 'vertical',histtype = 'bar', color ='red')
plt.show()

6.使用pandas_profiling生成报告

import pandas_profiling
pfr = pandas_profiling.ProfileReport(Train_data)
pfr.to_file("explore/example.html")

你可能感兴趣的:(02.心电图心跳信号多分类预测挑战赛【数据分析】)