Keras 是一个开源的深度学习框架,专为快速搭建和实验深度学习模型而设计。它是一个高层神经网络 API,用 Python 语言编写,强调易用性、模块化和可扩展性,允许开发者通过简单的代码快速实现各种深度学习模型(如 CNN、RNN、Transformer 等),尤其适合新手入门和快速验证算法原型。
# 安装(基于 TensorFlow 后端)
pip install tensorflow # 包含 Keras
# 验证
import tensorflow as tf
print(tf.keras.__version__) # 查看版本
model.add()
)。shape=( )
shape=(None,)
或 (batch_size,)
shape=(batch_size, features)
(batch, height, width, channels)
(RGB 为 3 通道)(batch, timesteps, features)
input_shape
:输入张量形状(不包含 batch 维度)。output_shape
:输出张量形状。weights
:层的可训练参数(如权重矩阵、偏置项)。Dense
(全连接层)、Activation
(激活函数层)、Dropout
(随机失活层)、Flatten
(展平层)。Conv2D
(二维卷积)、MaxPooling2D
(最大池化)、Conv1D
(一维卷积,用于序列数据)。LSTM
、GRU
(处理序列数据,如文本、时间序列)。TextVectorization
(文本向量化)、Normalization
(数值归一化)、Resizing
(图像尺寸调整)。Attention
、MultiHeadAttention
(Transformer 架构核心)。Embedding
(词嵌入层,用于 NLP)、Concatenate
(张量拼接)、Add
(张量相加)。compile()
:配置训练参数(优化器、损失函数、评估指标)。fit()
:执行训练过程(输入数据、epochs、批次大小等)。evaluate()
:评估模型在测试集上的性能。predict()
:对新数据进行预测。from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
MaxPooling2D((2, 2)),
Flatten(),
Dense(10, activation='softmax') # 分类输出层
])
model.summary() # 打印模型结构
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model
# 定义输入
input_a = Input(shape=(10,))
input_b = Input(shape=(20,))
# 定义分支
branch_a = Dense(64, activation='relu')(input_a)
branch_b = Dense(64, activation='relu')(input_b)
# 合并输出
merged = Concatenate()([branch_a, branch_b])
output = Dense(1, activation='sigmoid')(merged)
# 构建模型
model = Model(inputs=[input_a, input_b], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy')
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM
class CustomModel(Model):
def __init__(self, units):
super().__init__()
self.lstm = LSTM(units, return_sequences=True)
self.dense = Dense(1)
def call(self, inputs):
x = self.lstm(inputs) # 输入形状:(batch, timesteps, features)
return self.dense(x[:, -1, :]) # 取最后一个时间步输出
model = CustomModel(64)
model.compile(optimizer='rmsprop', loss='mse')
sparse_categorical_crossentropy
:标签为整数(未独热编码)。categorical_crossentropy
:标签为独热编码(需配合 softmax
激活)。binary_crossentropy
:二分类任务(配合 sigmoid
激活)。mean_squared_error
(MSE):均方误差。mean_absolute_error
(MAE):平均绝对误差。def custom_loss(y_true, y_pred):
return tf.reduce_mean(tf.square(y_true - y_pred)) # 均方损失
model.compile(loss=custom_loss, optimizer='adam')
Adam
:自适应学习率(默认参数:lr=0.001
,beta_1=0.9
,beta_2=0.999
)。SGD
:随机梯度下降(可搭配 Momentum
或 Nesterov
加速)。RMSprop
:均方根传播(适合循环神经网络)。from tensorflow.keras.optimizers import Adam
optimizer = Adam(learning_rate=0.0001, decay=1e-6)
model.compile(optimizer=optimizer, loss='mse')
accuracy
、precision
、recall
、AUC
。mae
、mse
、rmse
(需自定义,如 tf.sqrt(mse)
)。model.compile(
optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy', tf.keras.metrics.Precision()]
)
# 假设 X_train, y_train 为训练数据,X_test, y_test 为测试数据
history = model.fit(
X_train, y_train,
epochs=10,
batch_size=32,
validation_split=0.2, # 自动划分 20% 数据为验证集
shuffle=True
)
# 评估
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {accuracy}")
ModelCheckpoint
:保存最优模型(如 save_best_only=True
)。EarlyStopping
:监控验证指标,提前终止训练(如 patience=3
)。TensorBoard
:可视化训练过程(需安装 tensorboard
插件)。LearningRateScheduler
:动态调整学习率。from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint(
'best_model.h5',
monitor='val_loss',
save_best_only=True,
mode='min'
)
early_stop = EarlyStopping(
monitor='val_accuracy',
patience=5,
restore_best_weights=True
)
model.fit(X_train, y_train, callbacks=[checkpoint, early_stop])
TextVectorization
层(将文本转为 token 序列或嵌入索引)。from tensorflow.keras.layers import TextVectorization
text_vectorizer = TextVectorization(
max_tokens=10000, # 保留前 10000 个高频词
output_mode='int' # 输出整数索引
)
text_vectorizer.adapt(text_dataset) # 在数据集上适配
Normalization
层(标准化输入数据)。from tensorflow.keras.layers import Normalization
normalizer = Normalization(axis=-1) # 对最后一维归一化
normalizer.adapt(X_train) # 计算均值和方差
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
rotation_range=20, # 随机旋转角度
width_shift_range=0.2, # 水平平移范围
height_shift_range=0.2, # 垂直平移范围
shear_range=0.2, # 错切变换
zoom_range=0.2, # 随机缩放
horizontal_flip=True # 水平翻转
)
# 生成增强数据(配合 flow_from_directory 加载图像)
train_generator = datagen.flow_from_directory(
'data/train',
target_size=(256, 256),
batch_size=32,
class_mode='categorical'
)
from tensorflow.keras.layers import Layer
import tensorflow as tf
class MyDense(Layer):
def __init__(self, units, activation=None):
super().__init__()
self.units = units
self.activation = tf.keras.activations.get(activation)
def build(self, input_shape):
self.w = self.add_weight(
shape=(input_shape[-1], self.units),
initializer='glorot_uniform',
trainable=True
)
self.b = self.add_weight(
shape=(self.units,),
initializer='zeros',
trainable=True
)
super().build(input_shape) # 标记层已构建
def call(self, inputs):
return self.activation(tf.matmul(inputs, self.w) + self.b)
# 使用自定义层
model = Sequential([MyDense(64, activation='relu'), Dense(10)])
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
# 加载预训练模型(不包含顶层)
base_model = ResNet50(
weights='imagenet',
include_top=False,
input_shape=(224, 224, 3)
)
base_model.trainable = False # 冻结底层
# 构建新模型
inputs = tf.keras.Input(shape=(224, 224, 3))
x = base_model(inputs, training=False) # 推理模式(不更新冻结层)
x = GlobalAveragePooling2D()(x)
outputs = Dense(10, activation='softmax')(x)
model = Model(inputs, outputs)
model.compile(optimizer='adam', loss='categorical_crossentropy')
import tensorflow as tf
# 配置镜像策略(多 GPU 同步训练)
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
model = Sequential([Dense(64, activation='relu'), Dense(10)])
model.compile(optimizer='adam', loss='mse')
# 训练(自动将数据分发到各 GPU)
model.fit(X_train, y_train, batch_size=64 * strategy.num_replicas_in_sync)
model.save('model.h5')
loaded_model = tf.keras.models.load_model('model.h5')
model.save('saved_model', save_format='tf')
# 定义推理模型(固定输入形状)
inference_model = tf.keras.Model(
inputs=model.input,
outputs=model.output,
name="inference_model"
)
inference_model.save('inference_model', save_format='tf')
Dropout
层(如 Dropout(0.2)
)。l1
/l2
正则化):from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01))
LearningRateScheduler
动态调整:def lr_schedule(epoch):
return 1e-3 * 0.95 ** epoch
model.compile(optimizer=Adam(learning_rate=lr_schedule), loss='mse')
model.compile(
optimizer='adam',
loss={'out1': 'mse', 'out2': 'binary_crossentropy'},
loss_weights={'out1': 1.0, 'out2': 0.5}
)
tf.data.Dataset
加载数据(高效预处理):dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
dataset = dataset.shuffle(1024).batch(32).prefetch(tf.data.AUTOTUNE)
model.fit(dataset, epochs=10)
from kerastuner import RandomSearch
def build_model(hp):
model = Sequential()
model.add(Dense(hp.Int('units', min=32, max=256, step=32), activation='relu'))
model.add(Dense(10, activation='softmax'))
model.compile(optimizer=Adam(hp.Choice('lr', [1e-3, 1e-4])), loss='categorical_crossentropy')
return model
tuner = RandomSearch(build_model, objective='val_accuracy', max_trials=5)
tuner.search(X_train, y_train, epochs=10, validation_split=0.2)
best_model = tuner.get_best_models()[0]
Keras 通过抽象底层细节,提供了从数据预处理到模型部署的全流程支持,尤其适合快速验证算法思路和原型开发。掌握其核心逻辑(层的组合、损失与优化器配置、回调机制)后,结合 TensorFlow 生态可进一步扩展至复杂场景(如分布式训练、自定义算子)。建议通过实战项目(如图像分类、NLP 情感分析)加深理解,并关注官方文档(Keras Documentation)获取最新特性。