# 编译后的字节码对比
def traditional_loop():
result = []
for i in range(5):
result.append(i*2)
return result
def list_comprehension():
return [i*2 for i in range(5)]
# 使用dis模块查看字节码差异
import dis
print("传统循环:")
dis.dis(traditional_loop)
print("\n列表推导式:")
dis.dis(list_comprehension)
复制
关键发现:
LIST_APPEND
指令直接操作列表,减少方法查找开销# 矩阵展开 + 条件过滤
matrix = [[1, 2], [3, 4], [5, 6]]
flatten = [num for row in matrix
for num in row
if num % 2 == 0]
# 结果:[2, 4, 6]
# 模拟笛卡尔积
colors = ['红', '蓝']
sizes = ['S', 'L']
products = [(c, s) for c in colors for s in sizes]
# 结果:[('红', 'S'), ('红', 'L'), ('蓝', 'S'), ('蓝', 'L')]
复制
# 替代map+filter组合
# 传统写法
result = list(map(lambda x: x**2, filter(lambda x: x%2==0, range(10))))
# 推导式写法(快约30%)
result = [x**2 for x in range(10) if x%2 == 0]
复制
import sys
# 生成1百万个数字的平方
list_comp = [x**2 for x in range(10**6)]
gen_exp = (x**2 for x in range(10**6))
print(f"列表内存: {sys.getsizeof(list_comp)/1024/1024:.2f} MB") # 约8.39MB
print(f"生成器内存: {sys.getsizeof(gen_exp)} bytes") # 仅112字节
复制
# 构建数据处理管道
lines = (line.strip() for line in open('data.log'))
errors = (line for line in lines if 'ERROR' in line)
count = sum(1 for _ in errors)
print(f"错误数量: {count}")
复制
def chunk_generator(file_obj, chunk_size=1024):
while True:
data = file_obj.read(chunk_size)
if not data:
break
yield (byte for byte in data if byte != 0x00)
# 使用生成器表达式处理二进制文件
with open('binary.dat', 'rb') as f:
for chunk in chunk_generator(f):
processed = bytes(ord(b)^0xFF for b in chunk)
# 进一步处理...
复制
x = "原始值"
# 推导式中的变量会覆盖外部作用域!
result = [x for x in range(3)] # x最后为2
print(x) # 输出2(Python 3.8+会警告)
复制
# 在推导式中赋值并使用
data = [1, 2, 3, None, 5]
clean = [x for num in data if (x := num) is not None]
# 结果:[1, 2, 3, 5]
复制
def safe_convert(values):
return [int(x) if x.isdigit() else None for x in values]
# 更安全的写法:使用生成器函数
def safe_generator(values):
for x in values:
try:
yield int(x)
except ValueError:
yield None
复制
import numpy as np
import time
# 传统Python实现
def python_matrix_mult(a, b):
return [[sum(x*y for x,y in zip(row, col))
for col in zip(*b)]
for row in a]
# NumPy实现对比
a = np.random.rand(100, 100)
b = np.random.rand(100, 100)
# Python原生:约1.2秒
# NumPy:约0.002秒
复制
import mmap
def process_large_file(path):
with open(path, "r+") as f:
mm = mmap.mmap(f.fileno(), 0)
# 使用生成器避免加载整个文件
words = (word for word in mm.read().split() if len(word) > 5)
unique_words = set(words) # 此时才开始实际读取
复制
# 电商数据处理示例
raw_data = [
"用户A, 订单123, ¥150.5",
"无效数据",
"用户B, 订单124, ¥300"
]
processed = (
{"user": parts[0], "amount": float(parts[2][1:])}
for line in raw_data
if len(parts := line.split(", ")) == 3
if parts[2].startswith('¥')
)
# 转换为列表时才会实际处理
print(list(processed))
复制
import requests
def stream_tweets():
response = requests.get('https://api.twitter.com/stream', stream=True)
for line in response.iter_lines():
if line:
yield json.loads(line)
# 实时处理推文
keywords = ('Python', 'AI')
matching_tweets = (
tweet for tweet in stream_tweets()
if any(kw in tweet['text'] for kw in keywords)
)
for tweet in matching_tweets: # 无限流处理
send_alert(tweet)
复制