Python_计算两个省市之间的直线距离_2506

更新代码

上一版链接

import pandas as pd
import time
import pickle
import os
import sys
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
from tqdm import tqdm

# Configuration
INPUT_FILE = r"距离.xlsx"  # 输入文件路径
OUTPUT_FILE = r"output.xlsx"  # 输出文件路径
CACHE_FILE = r"coord_cache.pkl"  # 坐标缓存文件
USER_AGENT = "geo_distance_calculator"

# 初始化地理编码器
geolocator = Nominatim(user_agent=USER_AGENT, timeout=10)

# 加载或初始化缓存
def load_cache(path):
    if os.path.exists(path):
        try:
            with open(path, 'rb') as f:
                return pickle.load(f)
        except Exception as e:
            print(f"Warning: failed to load cache: {e}")
    return {}

coordinate_cache = load_cache(CACHE_FILE)

# 保存缓存
def save_cache(path, cache):
    try:
        with open(path, 'wb') as f:
            pickle.dump(cache, f)
    except Exception as e:
        print(f"Warning: failed to save cache: {e}")

# 获取经纬度
def geocode_pair(province: str, city: str):
    key = (province.strip(), city.strip())
    if key in coordinate_cache:
        return coordinate_cache[key]
    try:
        location = geolocator.geocode(f"{province.strip()} {city.strip()}")
        coord = (location.latitude, location.longitude) if location else None
    except Exception as e:
        print(f"Error geocoding {province} {city}: {e}")
        coord = None
    coordinate_cache[key] = coord
    save_cache(CACHE_FILE, coordinate_cache)
    time.sleep(1)  # 遵守使用策略
    return coord

# 计算直线距离
def compute_distance(coords1, coords2):
    if coords1 and coords2:
        return round(geodesic(coords1, coords2).kilometers, 3)
    return None

# 主流程
if __name__ == "__main__":
    # 检查输入文件
    if not os.path.exists(INPUT_FILE):
        print(f"Error: input file not found at {INPUT_FILE}")
        sys.exit(1)

    # 读取数据
    try:
        df = pd.read_excel(INPUT_FILE)
        print(f"Loaded {len(df)} rows from {INPUT_FILE}.")
    except Exception as e:
        print(f"Error reading Excel: {e}")
        sys.exit(1)

    # 确认必要列存在
    required_cols = ['发货省', '发货市', '到货省', '到货市']
    missing = [c for c in required_cols if c not in df.columns]
    if missing:
        print(f"Error: missing required columns: {missing}")
        sys.exit(1)

    # 去重对,批量地理编码
    发货_pairs = set(zip(df['发货省'], df['发货市']))
    到货_pairs = set(zip(df['到货省'], df['到货市']))
    all_pairs = 发货_pairs.union(到货_pairs)
    print(f"Geocoding {len(all_pairs)} unique province-city pairs...")
    for prov, city in tqdm(all_pairs):
        geocode_pair(prov, city)

    # 将坐标映射
    df['发货坐标'] = df.apply(lambda r: coordinate_cache.get((r['发货省'].strip(), r['发货市'].strip())), axis=1)
    df['到货坐标'] = df.apply(lambda r: coordinate_cache.get((r['到货省'].strip(), r['到货市'].strip())), axis=1)

    # 计算距离
    print("Calculating distances...")
    df['直线距离 (公里)'] = df.apply(lambda r: compute_distance(r['发货坐标'], r['到货坐标']), axis=1)
    print("Sample distances:")
    print(df[['发货省','发货市','到货省','到货市','直线距离 (公里)']].head())

    # 保存输出
    try:
        df.to_excel(OUTPUT_FILE, index=False)
        print(f"完成:结果已保存到 {OUTPUT_FILE}")
    except Exception as e:
        print(f"Error saving Excel: {e}")
        sys.exit(1)

你可能感兴趣的:(PYTHON,python,spring,开发语言)