34、习题:文件分块上传服务端-客户端实现

以下是为分块上传功能实现的完整服务器端代码,包含分块接收、临时存储和文件合并功能:

import os
import hashlib
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
import time

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'tmp_uploads'
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024  # 100MB限制
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif', 'mp4'}

# 确保临时存储目录存在
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def get_chunk_directory(file_id):
    """获取分块存储目录路径"""
    return os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(file_id))

def validate_chunks(file_id, total_chunks):
    """验证所有分块是否完整"""
    chunk_dir = get_chunk_directory(file_id)
    if not os.path.exists(chunk_dir):
        return False
    
    existing = set(
        int(f.split('_')[1].split('.')[0]) 
        for f in os.listdir(chunk_dir) 
        if f.startswith('chunk_')
    )
    return existing == set(range(total_chunks))

@app.route('/upload', methods=['POST'])
def upload_chunk():
    """处理文件分块上传"""
    # 验证必要参数
    if 'file' not in request.files:
        return jsonify({'error': 'No file part'}), 400
    
    file = request.files['file']
    chunk_num = request.form.get('chunk_num', type=int)
    file_id = request.form.get('file_id')
    
    if not (file and file_id and chunk_num is not None):
        return jsonify({'error': 'Missing parameters'}), 400
    
    # 创建分块存储目录
    chunk_dir = get_chunk_directory(file_id)
    os.makedirs(chunk_dir, exist_ok=True)
    
    # 保存分块文件
    chunk_filename = f"chunk_{chunk_num}.part"
    chunk_path = os.path.join(chunk_dir, chunk_filename)
    file.save(chunk_path)
    
    return jsonify({
        'status': 'success',
        'file_id': file_id,
        'chunk_num': chunk_num
    })

@app.route('/merge', methods=['POST'])
def merge_chunks():
    """合并分块文件"""
    data = request.get_json()
    file_id = data.get('file_id')
    total_chunks = data.get('total_chunks')
    original_filename = secure_filename(data.get('filename', 'merged_file'))
    
    if not (file_id and total_chunks):
        return jsonify({'error': 'Missing parameters'}), 400
    
    # 验证分块完整性
    if not validate_chunks(file_id, total_chunks):
        return jsonify({'error': 'Incomplete chunks'}), 400
    
    # 创建最终文件存储目录
    final_dir = os.path.join(app.config['UPLOAD_FOLDER'], 'merged')
    os.makedirs(final_dir, exist_ok=True)
    
    # 生成唯一文件名
    timestamp = int(time.time())
    final_filename = f"{timestamp}_{original_filename}"
    final_path = os.path.join(final_dir, final_filename)
    
    # 按顺序合并分块
    chunk_dir = get_chunk_directory(file_id)
    with open(final_path, 'wb') as output_file:
        for i in range(total_chunks):
            chunk_path = os.path.join(chunk_dir, f'chunk_{i}.part')
            with open(chunk_path, 'rb') as chunk_file:
                output_file.write(chunk_file.read())
    
    # 清理临时分块
    for f in os.listdir(chunk_dir):
        os.remove(os.path.join(chunk_dir, f))
    os.rmdir(chunk_dir)
    
    # 计算文件哈希
    with open(final_path, 'rb') as f:
        file_hash = hashlib.md5(f.read()).hexdigest()
    
    return jsonify({
        'status': 'success',
        'filename': final_filename,
        'size': os.path.getsize(final_path),
        'md5': file_hash
    })

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000, debug=True)

服务器端功能说明

  1. 分块接收模块

    @app.route('/upload', methods=['POST'])
    def upload_chunk():
        # 验证并保存单个分块到临时目录
        # 目录结构:tmp_uploads/{file_id}/chunk_{n}.part
    
  2. 完整性验证机制

    def validate_chunks(file_id, total_chunks):
        # 检查所有分块是否完整存在
    
  3. 文件合并模块

    @app.route('/merge', methods=['POST'])
    def merge_chunks():
        # 按顺序合并分块并生成最终文件
        # 最终文件保存到 merged/ 目录
    
  4. 安全防护措施

    • 使用secure_filename过滤危险字符
    • 限制文件扩展名(ALLOWED_EXTENSIONS
    • 设置最大上传限制(100MB)

配套客户端增强版

import os
import hashlib
import requests
from tqdm import tqdm

class EnhancedUploader:
    CHUNK_SIZE = 5 * 1024 * 1024  # 5MB
    
    def __init__(self, server_url):
        self.server_url = server_url
        
    def calculate_file_hash(self, file_path):
        """计算文件MD5哈希"""
        hash_md5 = hashlib.md5()
        with open(file_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        return hash_md5.hexdigest()
    
    def upload_file(self, file_path):
        """执行完整上传流程"""
        file_id = os.path.basename(file_path)
        total_size = os.path.getsize(file_path)
        total_chunks = (total_size + self.CHUNK_SIZE - 1) // self.CHUNK_SIZE
        
        # 上传所有分块
        with open(file_path, 'rb') as f, tqdm(total=total_size, unit='B', unit_scale=True) as pbar:
            for chunk_num in range(total_chunks):
                chunk_data = f.read(self.CHUNK_SIZE)
                files = {'file': (file_id, chunk_data)}
                data = {
                    'file_id': file_id,
                    'chunk_num': chunk_num
                }
                response = requests.post(
                    f"{self.server_url}/upload",
                    files=files,
                    data=data
                )
                pbar.update(len(chunk_data))
                if response.status_code != 200:
                    raise Exception(f"Chunk {chunk_num} upload failed")
        
        # 发起合并请求
        merge_data = {
            'file_id': file_id,
            'total_chunks': total_chunks,
            'filename': os.path.basename(file_path)
        }
        response = requests.post(
            f"{self.server_url}/merge",
            json=merge_data
        )
        return response.json()

# 使用示例
if __name__ == "__main__":
    uploader = EnhancedUploader("http://localhost:5000")
    result = uploader.upload_file("bigfile.zip")
    print("Final Result:", result)

服务器部署建议

  1. 使用Gunicorn生产服务器:

    gunicorn -w 4 -b 0.0.0.0:5000 app:app
    
  2. 配置Nginx反向代理

  3. 添加HTTPS支持(Let’s Encrypt)

  4. 设置定期清理临时文件的定时任务

扩展功能建议

  1. 断点续传支持:

    @app.route('/status/')
    def check_upload_status(file_id):
        # 返回已上传的分块列表
    
  2. 分块哈希校验:

    # 上传时携带分块哈希
    headers = {'X-Chunk-Hash': chunk_hash}
    
  3. 分布式存储支持:

    # 将分块存储到S3/MinIO等对象存储
    
  4. 上传限速控制:

    from flask_limiter import Limiter
    limiter = Limiter(app=app)
    

该实现方案具备生产级文件上传服务的基础能力,可根据实际需求扩展校验机制、存储后端和监控功能。

你可能感兴趣的:(python,python,开发语言,网络)