找到pdb中的边界

  1. 识别行开头为ATOM的行。
  2. [30:38]为x列,[38:46]为y列,[46:55]为z列,[17:21]为resname列
  3. 分别识别x,y,z列中最大值和最小值,输出为整体最大最小值
  4. 识别resname除TIP3,POPC,SOD,CLA外的分别识别x,y,z列中最大值和最小值,输出为结构最大最小值
  5. 计算结构最大最小值的边界(结构最小值减 5,最大值加 5)
  6. 输出保存为一个文件
import sys

def analyze_pdb_file(input_file, output_file):
    # 初始化变量
    all_x = []
    all_y = []
    all_z = []
    struct_x = []
    struct_y = []
    struct_z = []
    excluded_resnames = {'TIP3', 'POPC', 'SOD', 'CLA'}

    try:
        # 步骤1: 读取文件并提取数据
        with open(input_file, 'r') as f:
            for line in f:
                if line.startswith('ATOM'):
                    # 步骤2: 提取各列数据
                    x = float(line[30:38].strip())
                    y = float(line[38:46].strip())
                    z = float(line[46:54].strip())
                    resname = line[17:21].strip()

                    # 保存所有原子的坐标
                    all_x.append(x)
                    all_y.append(y)
                    all_z.append(z)

                    # 保存非排除残基的坐标
                    if resname not in excluded_resnames:
                        struct_x.append(x)
                        struct_y.append(y)
                        struct_z.append(z)

        # 步骤3: 计算整体最大最小值
        overall_min = (min(all_x), min(all_y), min(all_z))
        overall_max = (max(all_x), max(all_y), max(all_z))

        # 步骤4: 计算结构最大最小值
        struct_min = (min(struct_x), min(struct_y), min(struct_z)) if struct_x else (0, 0, 0)
        struct_max = (max(struct_x), max(struct_y), max(struct_z)) if struct_x else (0, 0, 0)
        
        # 新增步骤: 计算结构边界 (最小值+5,最大值-5)
        struct_boundary_min = (struct_min[0]-5, struct_min[1]-5, struct_min[2]-5) if struct_x else (0, 0, 0)
        struct_boundary_max = (struct_max[0]+5, struct_max[1]+5, struct_max[2]+5) if struct_x else (0, 0, 0)

        # 写入结果到文件
        with open(output_file, 'w') as f:
            f.write("整体最大最小值:\n")
            f.write(f"最小值: X={overall_min[0]:.3f}, Y={overall_min[1]:.3f}, Z={overall_min[2]:.3f}\n")
            f.write(f"最大值: X={overall_max[0]:.3f}, Y={overall_max[1]:.3f}, Z={overall_max[2]:.3f}\n\n")
            
            f.write("结构最大最小值 (排除 TIP3, POPC, SOD, CLA):\n")
            if struct_x:
                f.write(f"最小值: X={struct_min[0]:.3f}, Y={struct_min[1]:.3f}, Z={struct_min[2]:.3f}\n")
                f.write(f"最大值: X={struct_max[0]:.3f}, Y={struct_max[1]:.3f}, Z={struct_max[2]:.3f}\n\n")
                
                # 新增输出: 结构边界
                f.write("结构最大最小值 边界:\n")
                f.write(f"最小值-5: X={struct_boundary_min[0]:.3f}, Y={struct_boundary_min[1]:.3f}, Z={struct_boundary_min[2]:.3f}\n")
                f.write(f"最大值+5: X={struct_boundary_max[0]:.3f}, Y={struct_boundary_max[1]:.3f}, Z={struct_boundary_max[2]:.3f}\n")
            else:
                f.write("没有找到符合条件的残基\n")

        print(f"分析完成,结果已保存到 {output_file}")

    except FileNotFoundError:
        print(f"错误: 文件 {input_file} 不存在")
    except Exception as e:
        print(f"发生未知错误: {e}")

if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("使用方法: python pdb_analyzer.py <输入PDB文件> <输出结果文件>")
        sys.exit(1)
    
    input_file = sys.argv[1]
    output_file = sys.argv[2]
    
    analyze_pdb_file(input_file, output_file)    

你可能感兴趣的:(Python脚本,算法,数据结构)