导入模块
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing as ps
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
first_data = pd.read_csv("train.csv")
print(first_data)

数据清洗
print(len(first_data["Name"].unique()) == first_data.shape[0])
first_data = first_data.drop(["Cabin","Name",'PassengerId','Ticket'],axis = 1)
age = float(int(first_data["Age"].mean()))
embarked = first_data["Embarked"].value_counts().index[0]
first_data.fillna({"Age":age,"Embarked":embarked},inplace=True)
sex = first_data["Sex"].unique().tolist()
emb = first_data["Embarked"].unique().tolist()
first_data["Sex"] = first_data["Sex"].apply(lambda x: sex.index(x)+1)
first_data["Embarked"] = first_data["Embarked"].apply(lambda x: emb.index(x)+1)
first_data.drop_duplicates(inplace=True)
first_data.index = range(first_data.shape[0])
import random
test_set = set([])
while len(test_set) < 277:
num = random.randint(0,776)
test_set.add(num)
test_list = list(test_set)
train_list = [x for x in range(first_data.shape[0]) if x not in test_list]
random.shuffle(train_list)
print(test_list)
print(train_list)
print(len(test_list),len(train_list))
print(len(test_list) + len(train_list) == first_data.shape[0])
X = first_data[[x for x in first_data.columns if x != "Survived"]]
y = first_data["Survived"]
X = ps.scale(X)
x_train = torch.from_numpy(X[train_list]).type(torch.float32)
y_train = torch.from_numpy(y[train_list].values).type(torch.float32)
x_test = torch.from_numpy(X[test_list]).type(torch.float32)
y_test = torch.from_numpy(y[test_list].values).type(torch.float32)
batch = 64
device = torch.device("cuda" if torch.cuda.is_available() else "gpu")
train_td = TensorDataset(x_train,y_train)
train_dl = DataLoader(train_td,batch_size=batch,shuffle=False)
test_td = TensorDataset(x_test,y_test)
test_dl = DataLoader(test_td,batch_size=batch,shuffle=False)
设计网络层及训练(损失函数使用BCELoss())
class Net(nn.Module):
def __init__(self,inp):
super(Net,self).__init__()
self.input = nn.Linear(inp,1)
self.sigmoid = nn.Sigmoid()
def forward(self,x):
x = self.input(x)
x = self.sigmoid(x)
return x
net1 = Net(x_train.size()[1])
net1.to(device)
optimizer = torch.optim.SGD(net1.parameters(),lr=0.001)
loss_func = nn.BCELoss()
high_acc1 = []
for i in range(3000):
loss_trains = 0.0
for indexs,(datas,labels) in enumerate(train_dl):
datas, labels = datas.to(device), labels.to(device)
predict = net1(datas)
loss_train = loss_func(predict.squeeze(),labels)
loss_trains += loss_train.item()
optimizer.zero_grad()
loss_train.backward()
optimizer.step()
if indexs % 2 == 0:
print("batch_train_loss:",loss_train.item())
print("train_loss:{:.4f}".format(loss_trains))
with torch.no_grad():
loss_tests = 0.0
accuracy = 0.0
for data,label in test_dl:
data, label = data.to(device), label.to(device)
out = net1(data)
loss_test = loss_func(out,label.view_as(out))
loss_tests += loss_test.item()
pred = out.gt(0.5).float().squeeze()
accuracy += pred.eq(label).sum().item()
print("epoch:{} test_loss:{:.4f} test_accuracy:{:.4f}".format(i,loss_tests/len(test_dl),accuracy/x_test.size()[0]))
high_acc1.append(round(accuracy/x_test.size()[0],4))
print(max(high_acc1))

设计网络层及训练(损失函数使用CrossEntropyLoss())
class Net2(nn.Module):
def __init__(self,inp):
super(Net2,self).__init__()
self.input = nn.Linear(inp,2)
def forward(self,x):
x = self.input(x)
return x
net2 = Net2(x_train.size()[1])
net2.to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net2.parameters(),lr=0.001)
high_acc2 = []
for i in range(3000):
loss_trains = 0.0
for indexs,(datas,labels) in enumerate(train_dl):
labels = labels.type(torch.long)
datas, labels = datas.to(device), labels.to(device)
predict = net2(datas)
loss_train = loss_func(predict.squeeze(),labels)
loss_trains += loss_train.item()
optimizer.zero_grad()
loss_train.backward()
optimizer.step()
if indexs % 2 == 0:
print("batch_train_loss:",loss_train.item())
print("train_loss:{:.4f}".format(loss_trains))
with torch.no_grad():
loss_tests = 0.0
accuracy = 0.0
for data,label in test_dl:
label = label.type(torch.long)
data, label = data.to(device), label.to(device)
out = net2(data)
loss_test = loss_func(out.squeeze(),out)
loss_tests += loss_test.item()
pred = torch.argmax(out,dim=1)
accuracy += pred.eq(label).sum().item()
print("epoch:{} test_loss:{:.4f} test_accuracy:{:.4f}".format(i,loss_tests/len(test_dl),accuracy/x_test.size()[0]))
high_acc2.append(round(accuracy/x_test.size()[0],4))
print(max(high_acc2))
