近期肝爆,本文通过AES-NI高级处理器硬件指令集实现AES-256-CFB算法,并与OpenSSL 1.1.x/3x库实现的 AES-256-CFB加密结果对比正确性,及能效测试。
源实现已添加了大量的中文注释,如果对实现原理感兴趣的童鞋,可以详细参考阅读本文的C++代码实现,性能这块没得说,预期目的一直是为了 PPP PRIVATE NETWORK™ 2 性能继续提高服务的,否则难有过多的心气,去肝爆重写这个东西。
但我并不否认,本文的AES实现存在致命的平台处理器CPU局限性,但我个人认为,的确是可以通过C/CXX编译器宏,为特定平台提高更为极致的性能优化是有一定必要的,因为在绝大多数情况下,目标也均仅在X86/X64(i386/i686、AMD64)处理器平台上运行。
相互关联的博文:(Me)
通过高级处理器硬件指令集AES-NI实现AES-256-CFB算法。-CSDN博客
通过高级处理器硬件指令集AES-NI实现AES-256-CFB算法并通过OPENSSL加密验证算法正确性。-CSDN博客
OpenSSL 的 AES-NI 支持机制-CSDN博客
C/C++ 检测CPU是否支持AES-NI硬件指令集-CSDN博客
关于AES-256-CFB密钥扩展实现-CSDN博客
AES测试能效对比:
本文实现与OpenSSL 两者之间,为了更为严谨OPENSSL、本文实现均重复运行十万次、当然大家可以修改为百万次、千万次对比,但总体而言,本文实现AES-256-CFB的能效收益最好,前提你要 Release 开-O3编译,DEBUG模式本文代码跑不过Release的OpenSSL实现。
AES测试打印内容:
root@liulilte:~/dev/aes_test/bin# ./aes_test
=== AES-256-CFB 实现验证 ===
=== 加密结果对比 ===
自定义实现加密 (显示前35字节): c6 c7 db a6 3e 4b 65 fe af af 64 a2 7e 7f 4d ca fc df 7e 5f 5b 77 d5 48 74 11 d7 80 7c 6b 97 27 0d 7c 2a
OpenSSL 加密 (显示前35字节): c6 c7 db a6 3e 4b 65 fe af af 64 a2 7e 7f 4d ca fc df 7e 5f 5b 77 d5 48 74 11 d7 80 7c 6b 97 27 0d 7c 2a
加密结果匹配: 是
测试通过: 所有结果匹配!
=== 性能测试 (10万次加密) ===
数据长度: 346 字节
总数据量: 33.00 MB
自定义实现:
总时间: 0.02163 秒
平均时间: 0.00022 毫秒/次
吞吐量: 1525.67 MB/s
OpenSSL 实现:
总时间: 0.06241 秒
平均时间: 0.00062 毫秒/次
吞吐量: 528.74 MB/s
性能比较:
自定义实现比OpenSSL快 65.34%
吞吐量比率: 2.89x
补充(测试程序改成1000万次可更好得体现,两者能效差异,可见本文实现能效是非常好的):
截图:
输出:
root@liulilte:~/dev/aes_test/bin# ./aes_test
=== AES-256-CFB 实现验证 ===
=== 加密结果对比 ===
自定义实现加密 (显示前35字节): c6 c7 db a6 3e 4b 65 fe af af 64 a2 7e 7f 4d ca fc df 7e 5f 5b 77 d5 48 74 11 d7 80 7c 6b 97 27 0d 7c 2a
OpenSSL 加密 (显示前35字节): c6 c7 db a6 3e 4b 65 fe af af 64 a2 7e 7f 4d ca fc df 7e 5f 5b 77 d5 48 74 11 d7 80 7c 6b 97 27 0d 7c 2a
加密结果匹配: 是
测试通过: 所有结果匹配!
=== 性能测试 (1000万次加密) ===
数据长度: 346 字节
总数据量: 3299.71 MB
自定义实现:
总时间: 2.14650 秒
平均时间: 0.00021 毫秒/次
吞吐量: 1537.25 MB/s
OpenSSL 实现:
总时间: 6.21900 秒
平均时间: 0.00062 毫秒/次
吞吐量: 530.59 MB/s
性能比较:
自定义实现比OpenSSL快 65.48%
吞吐量比率: 2.90x
本文源编译注意事项:
如:CMakeFiles.txt
1、删除 -fvisibility=hidden
利用SSE/AES/SIMD指令集进行硬件加速时,添加该选项会导致编译不过。
2、对于GCC 8.0以下老编译器(例如默认的GCC 7.X)需要添加以下额外选项。
# 针对旧版GCC的额外选项
IF(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0)
ADD_COMPILE_OPTIONS(-march=westmere)
ADD_COMPILE_OPTIONS(-fno-stack-protector)
ENDIF()
3、 利用 CMake 检查当前CXX平台编译器是否支持AES-NI,并启用依赖指令集选项。
# 在编译器标志部分添加
ADD_COMPILE_OPTIONS(-maes -msse4 -mpclmul)
# 检查编译器是否支持AES-NI
INCLUDE(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-MAES" COMPILER_SUPPORTS_AES)
IF(NOT COMPILER_SUPPORTS_AES)
MESSAGE(FATAL_ERROR "COMPILER DOES NOT SUPPORT AES-NI INSTRUCTIONS")
ENDIF()
好了不多说,以下为 C++ 代码算法及测试代码的实现(新建复制到 main.cpp 文件):
#include // AES 指令集
#include // SSE2 指令集
#include
#include
#include
#include
#include // std::min
#include // OpenSSL 加密
#include // 用于存储测试数据
// AES-256 密钥扩展
void aes256_key_expansion(const uint8_t *key, __m128i *round_key) {
// 加载初始密钥 (256位 = 32字节)
__m128i key1 = _mm_loadu_si128((const __m128i*)key); // 前128位
__m128i key2 = _mm_loadu_si128((const __m128i*)(key + 16)); // 后128位
round_key[0] = key1; // 第0轮密钥
round_key[1] = key2; // 第1轮密钥
// 密钥扩展过程 (手动展开所有轮次)
__m128i temp = _mm_aeskeygenassist_si128(key2, 0x01); // 生成辅助密钥
temp = _mm_shuffle_epi32(temp, 0xFF); // 混洗字节
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4)); // 移位并异或
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 8));
key1 = _mm_xor_si128(key1, temp); // 添加轮常数
round_key[2] = key1; // 第2轮密钥
temp = _mm_aeskeygenassist_si128(key1, 0x00);
temp = _mm_shuffle_epi32(temp, 0xAA);
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 8));
key2 = _mm_xor_si128(key2, temp);
round_key[3] = key2; // 第3轮密钥
temp = _mm_aeskeygenassist_si128(key2, 0x02);
temp = _mm_shuffle_epi32(temp, 0xFF);
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 8));
key1 = _mm_xor_si128(key1, temp);
round_key[4] = key1; // 第4轮密钥
temp = _mm_aeskeygenassist_si128(key1, 0x00);
temp = _mm_shuffle_epi32(temp, 0xAA);
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 8));
key2 = _mm_xor_si128(key2, temp);
round_key[5] = key2; // 第5轮密钥
temp = _mm_aeskeygenassist_si128(key2, 0x04);
temp = _mm_shuffle_epi32(temp, 0xFF);
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 8));
key1 = _mm_xor_si128(key1, temp);
round_key[6] = key1; // 第6轮密钥
temp = _mm_aeskeygenassist_si128(key1, 0x00);
temp = _mm_shuffle_epi32(temp, 0xAA);
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 8));
key2 = _mm_xor_si128(key2, temp);
round_key[7] = key2; // 第7轮密钥
temp = _mm_aeskeygenassist_si128(key2, 0x08);
temp = _mm_shuffle_epi32(temp, 0xFF);
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 8));
key1 = _mm_xor_si128(key1, temp);
round_key[8] = key1; // 第8轮密钥
temp = _mm_aeskeygenassist_si128(key1, 0x00);
temp = _mm_shuffle_epi32(temp, 0xAA);
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 8));
key2 = _mm_xor_si128(key2, temp);
round_key[9] = key2; // 第9轮密钥
temp = _mm_aeskeygenassist_si128(key2, 0x10);
temp = _mm_shuffle_epi32(temp, 0xFF);
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 8));
key1 = _mm_xor_si128(key1, temp);
round_key[10] = key1; // 第10轮密钥
temp = _mm_aeskeygenassist_si128(key1, 0x00);
temp = _mm_shuffle_epi32(temp, 0xAA);
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 8));
key2 = _mm_xor_si128(key2, temp);
round_key[11] = key2; // 第11轮密钥
temp = _mm_aeskeygenassist_si128(key2, 0x20);
temp = _mm_shuffle_epi32(temp, 0xFF);
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 8));
key1 = _mm_xor_si128(key1, temp);
round_key[12] = key1; // 第12轮密钥
temp = _mm_aeskeygenassist_si128(key1, 0x00);
temp = _mm_shuffle_epi32(temp, 0xAA);
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 8));
key2 = _mm_xor_si128(key2, temp);
round_key[13] = key2; // 第13轮密钥
temp = _mm_aeskeygenassist_si128(key2, 0x40);
temp = _mm_shuffle_epi32(temp, 0xFF);
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 4));
key1 = _mm_xor_si128(key1, _mm_slli_si128(key1, 8));
key1 = _mm_xor_si128(key1, temp);
round_key[14] = key1; // 第14轮密钥
}
// AES-256 加密单个128位块
__m128i aes256_encrypt_block(__m128i block, const __m128i *round_key) {
block = _mm_xor_si128(block, round_key[0]); // 初始轮密钥加
// 执行13轮完整AES轮函数
for (int i = 1; i < 14; i++) {
block = _mm_aesenc_si128(block, round_key[i]); // AES加密指令
}
// 最后一轮(无MixColumns)
block = _mm_aesenclast_si128(block, round_key[14]);
return block;
}
// AES-256-CFB 加密
void aes256_cfb_encrypt(uint8_t *ciphertext, const uint8_t *plaintext, size_t len,
const uint8_t *iv, const __m128i *round_key) {
__m128i feedback = _mm_loadu_si128((const __m128i*)iv); // 初始化反馈寄存器
size_t blocks = len / 16; // 完整块数量
size_t remaining = len % 16; // 剩余字节数
// 处理完整块
for (size_t i = 0; i < blocks; i++) {
// 生成密钥流
__m128i keystream = aes256_encrypt_block(feedback, round_key);
// 加载当前明文块
__m128i plain_block = _mm_loadu_si128((const __m128i*)(plaintext + i * 16));
// 异或生成密文块
__m128i cipher_block = _mm_xor_si128(plain_block, keystream);
// 存储密文块
_mm_storeu_si128((__m128i*)(ciphertext + i * 16), cipher_block);
// 更新反馈寄存器(使用新生成的密文)
feedback = cipher_block;
}
// 处理部分块
if (remaining > 0) {
// 生成部分密钥流
__m128i keystream = aes256_encrypt_block(feedback, round_key);
const uint8_t* src = plaintext + blocks * 16;
uint8_t* dst = ciphertext + blocks * 16;
// 逐字节异或处理剩余数据
for (size_t i = 0; i < remaining; i++) {
dst[i] = src[i] ^ ((uint8_t*)&keystream)[i];
}
}
}
// AES-256-CFB 解密
void aes256_cfb_decrypt(uint8_t *plaintext, const uint8_t *ciphertext, size_t len,
const uint8_t *iv, const __m128i *round_key) {
__m128i feedback = _mm_loadu_si128((const __m128i*)iv); // 初始化反馈寄存器
size_t blocks = len / 16; // 完整块数量
size_t remaining = len % 16; // 剩余字节数
// 处理完整块
for (size_t i = 0; i < blocks; i++) {
// 生成密钥流
__m128i keystream = aes256_encrypt_block(feedback, round_key);
// 加载当前密文块
__m128i cipher_block = _mm_loadu_si128((const __m128i*)(ciphertext + i * 16));
// 异或生成明文块
__m128i plain_block = _mm_xor_si128(cipher_block, keystream);
// 存储明文块
_mm_storeu_si128((__m128i*)(plaintext + i * 16), plain_block);
// 更新反馈寄存器(使用当前密文)
feedback = cipher_block;
}
// 处理部分块
if (remaining > 0) {
// 生成部分密钥流
__m128i keystream = aes256_encrypt_block(feedback, round_key);
const uint8_t* src = ciphertext + blocks * 16;
uint8_t* dst = plaintext + blocks * 16;
// 逐字节异或处理剩余数据
for (size_t i = 0; i < remaining; i++) {
dst[i] = src[i] ^ ((uint8_t*)&keystream)[i];
}
}
}
// 打印十六进制数据
void print_hex(const char* label, const uint8_t* data, size_t len, size_t max_display = 128) {
std::cout << label << " (显示前" << std::min(len, max_display) << "字节): ";
size_t display_len = std::min(len, max_display);
for (size_t i = 0; i < display_len; i++) {
std::cout << std::hex << std::setw(2) << std::setfill('0')
<< static_cast(data[i]) << " ";
}
std::cout << (len > max_display ? "..." : "") << std::dec << "\n";
}
// 使用OpenSSL进行AES-256-CFB加密
bool openssl_aes256_cfb_encrypt(uint8_t *ciphertext, const uint8_t *plaintext, size_t len,
const uint8_t *key, const uint8_t *iv) {
EVP_CIPHER_CTX *ctx = EVP_CIPHER_CTX_new();
if (!ctx) return false;
// 初始化加密上下文
if (1 != EVP_EncryptInit_ex(ctx, EVP_aes_256_cfb128(), NULL, key, iv)) {
EVP_CIPHER_CTX_free(ctx);
return false;
}
// 禁用填充 (CFB模式不需要填充)
EVP_CIPHER_CTX_set_padding(ctx, 0);
int out_len;
// 执行加密
if (1 != EVP_EncryptUpdate(ctx, ciphertext, &out_len, plaintext, len)) {
EVP_CIPHER_CTX_free(ctx);
return false;
}
// 结束加密流
int final_len;
if (1 != EVP_EncryptFinal_ex(ctx, ciphertext + out_len, &final_len)) {
EVP_CIPHER_CTX_free(ctx);
return false;
}
EVP_CIPHER_CTX_free(ctx);
return true;
}
// 验证实现正确性(包括与OpenSSL对比)
bool test_aes256_cfb() {
// 测试向量
uint8_t key[32] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
};
uint8_t iv[16] = {
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff
};
const char* plaintext = "Hello, AES-256-CFB! This is a test.";
size_t len = strlen(plaintext);
// 扩展密钥存储
__m128i round_key[15];
aes256_key_expansion(key, round_key);
// 分配内存
uint8_t* ciphertext = new uint8_t[len];
uint8_t* openssl_ciphertext = new uint8_t[len];
uint8_t* decrypted = new uint8_t[len + 1];
// 使用自定义实现加密
aes256_cfb_encrypt(ciphertext, (const uint8_t*)plaintext, len, iv, round_key);
// 使用OpenSSL加密
if (!openssl_aes256_cfb_encrypt(openssl_ciphertext, (const uint8_t*)plaintext, len, key, iv)) {
std::cerr << "OpenSSL加密失败!\n";
delete[] ciphertext;
delete[] openssl_ciphertext;
delete[] decrypted;
return false;
}
// 对比加密结果
bool encryption_match = memcmp(ciphertext, openssl_ciphertext, len) == 0;
std::cout << "\n=== 加密结果对比 ===\n";
print_hex("自定义实现加密", ciphertext, len);
print_hex("OpenSSL 加密 ", openssl_ciphertext, len);
std::cout << "加密结果匹配: " << (encryption_match ? "是" : "否") << "\n";
// 使用自定义实现解密
aes256_cfb_decrypt(decrypted, ciphertext, len, iv, round_key);
decrypted[len] = '\0'; // 添加终止符
// 验证解密结果
bool decryption_match = memcmp(plaintext, decrypted, len) == 0;
if (!encryption_match || !decryption_match) {
std::cerr << "\n测试失败!\n";
if (!encryption_match) {
std::cerr << "错误: 加密结果与OpenSSL不一致!\n";
}
if (!decryption_match) {
std::cerr << "错误: 解密结果与原始文本不匹配!\n";
std::cerr << "原始文本: " << plaintext << "\n";
std::cerr << "解密文本: " << decrypted << "\n";
}
// 打印详细数据
print_hex("原始数据", (const uint8_t*)plaintext, len);
print_hex("解密数据", decrypted, len);
delete[] ciphertext;
delete[] openssl_ciphertext;
delete[] decrypted;
return false;
}
delete[] ciphertext;
delete[] openssl_ciphertext;
delete[] decrypted;
std::cout << "\n测试通过: 所有结果匹配!\n";
return true;
}
// 性能测试函数
void performance_test(const uint8_t* key, const uint8_t* iv,
const uint8_t* plaintext, size_t len,
const __m128i* round_key) {
const int TEST_ITERATIONS = 100000; // 10万次测试
// 准备输入输出缓冲区
std::vector input_buffer(plaintext, plaintext + len);
std::vector output_buffer(len);
// 预热缓存(运行几次以避免冷启动影响)
for (int i = 0; i < 10; i++) {
aes256_cfb_encrypt(output_buffer.data(), input_buffer.data(), len, iv, round_key);
openssl_aes256_cfb_encrypt(output_buffer.data(), input_buffer.data(), len, key, iv);
}
// 测试自定义实现性能
std::clock_t custom_start = std::clock();
for (int i = 0; i < TEST_ITERATIONS; i++) {
aes256_cfb_encrypt(output_buffer.data(), input_buffer.data(), len, iv, round_key);
}
double custom_time = (std::clock() - custom_start) / (double)CLOCKS_PER_SEC;
// 测试OpenSSL实现性能
std::clock_t openssl_start = std::clock();
for (int i = 0; i < TEST_ITERATIONS; i++) {
openssl_aes256_cfb_encrypt(output_buffer.data(), input_buffer.data(), len, key, iv);
}
double openssl_time = (std::clock() - openssl_start) / (double)CLOCKS_PER_SEC;
// 计算吞吐量 (MB/s)
double total_bytes = len * TEST_ITERATIONS;
double custom_throughput = (total_bytes / (1024.0 * 1024.0)) / custom_time;
double openssl_throughput = (total_bytes / (1024.0 * 1024.0)) / openssl_time;
// 输出性能报告
std::cout << "\n=== 性能测试 (10万次加密) ===";
std::cout << "\n数据长度: " << len << " 字节";
std::cout << "\n总数据量: " << std::fixed << std::setprecision(2)
<< total_bytes / (1024.0 * 1024.0) << " MB\n";
std::cout << "\n自定义实现:";
std::cout << "\n 总时间: " << std::fixed << std::setprecision(5)
<< custom_time << " 秒";
std::cout << "\n 平均时间: " << std::fixed << std::setprecision(5)
<< custom_time * 1000 / TEST_ITERATIONS << " 毫秒/次";
std::cout << "\n 吞吐量: " << std::fixed << std::setprecision(2)
<< custom_throughput << " MB/s\n";
std::cout << "\nOpenSSL 实现:";
std::cout << "\n 总时间: " << std::fixed << std::setprecision(5)
<< openssl_time << " 秒";
std::cout << "\n 平均时间: " << std::fixed << std::setprecision(5)
<< openssl_time * 1000 / TEST_ITERATIONS << " 毫秒/次";
std::cout << "\n 吞吐量: " << std::fixed << std::setprecision(2)
<< openssl_throughput << " MB/s\n";
// 性能比较
std::cout << "\n性能比较:";
if (custom_time < openssl_time) {
double faster = (openssl_time - custom_time) / openssl_time * 100;
std::cout << "\n 自定义实现比OpenSSL快 " << std::fixed << std::setprecision(2)
<< faster << "%";
} else {
double faster = (custom_time - openssl_time) / custom_time * 100;
std::cout << "\n OpenSSL比自定义实现快 " << std::fixed << std::setprecision(2)
<< faster << "%";
}
std::cout << "\n 吞吐量比率: " << std::fixed << std::setprecision(2)
<< custom_throughput / openssl_throughput << "x\n";
}
int main() {
std::cout << "=== AES-256-CFB 实现验证 ===\n";
// 首先运行自测试(包含与OpenSSL的对比)
if (!test_aes256_cfb()) {
std::cerr << "\n实现验证失败,请检查代码!\n";
return 1;
}
// 示例密钥(256位 = 32字节)
uint8_t key[32] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
};
// 示例IV(128位 = 16字节)
uint8_t iv[16] = {
0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff
};
// 测试数据
const char* plaintext = "Hello, AES-256-CFB! This is a test message for encryption and decryption. "
"This additional text is to make the test data longer for more meaningful performance testing. "
"AES (Advanced Encryption Standard) is a specification for the encryption of electronic data "
"established by the U.S. National Institute of Standards and Technology (NIST) in 2001.";
size_t len = strlen(plaintext);
// 扩展密钥存储
__m128i round_key[15];
aes256_key_expansion(key, round_key);
// 运行性能测试
performance_test(key, iv, (const uint8_t*)plaintext, len, round_key);
return 0;
}