Kite的图像分割项目展示了OpenCV在传统图像分割任务中的典型应用:
图:项目实现的多种分割效果对比(来源:项目仓库)
项目涵盖的主要算法类型:
组件 | 最低要求 | 推荐配置 |
---|---|---|
Python | 3.6 | 3.8+ |
OpenCV | 4.2 | 4.5+ |
NumPy | 1.19 | 1.21+ |
内存 | 4GB | 8GB+ |
# 克隆项目仓库
git clone https://github.com/kiteco/kite-python-blog-post-code.git
cd kite-python-blog-post-code/image-segmentation
# 创建虚拟环境
python -m venv venv
source venv/bin/activate # Linux/Mac
# venv\Scripts\activate # Windows
# 安装依赖
pip install opencv-python numpy matplotlib
# 验证安装
python -c "import cv2; print(f'OpenCV版本: {cv2.__version__}')"
image-segmentation/
├── images/ # 示例图像
│ ├── sample.jpg
│ └── medical.png
├── thresholding.py # 阈值分割实现
├── edge_based.py # 边缘检测分割
├── region_based.py # 区域分割算法
├── clustering.py # 色彩聚类分割
├── utils.py # 辅助函数
└── evaluate.py # 分割评估指标
# thresholding.py
import cv2
import numpy as np
def otsu_thresholding(img_path):
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
blur = cv2.GaussianBlur(img, (5,5), 0)
# Otsu自动阈值计算
_, thresh = cv2.threshold(
blur, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU
)
return thresh
数学原理:
Otsu方法最大化类间方差:
\sigma_b^2(t) = \omega_1(t)\omega_2(t)[\mu_1(t)-\mu_2(t)]^2
其中:
# edge_based.py
def canny_segmentation(img_path, low=50, high=150):
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
blur = cv2.GaussianBlur(img, (5,5), 0)
# Canny边缘检测
edges = cv2.Canny(blur, low, high)
# 边缘闭合处理
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
closed = cv2.morphologyEx(
edges, cv2.MORPH_CLOSE,
kernel, iterations=2
)
return closed
参数优化建议:
# region_based.py
def watershed_segmentation(img_path):
img = cv2.imread(img_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 前景/背景标记
_, thresh = cv2.threshold(gray, 0, 255,
cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# 确定确定背景区域
kernel = np.ones((3,3), np.uint8)
sure_bg = cv2.dilate(thresh, kernel, iterations=3)
# 距离变换找确定前景
dist = cv2.distanceTransform(thresh, cv2.DIST_L2, 5)
_, sure_fg = cv2.threshold(
dist, 0.7*dist.max(), 255, 0)
# 未知区域计算
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# 标记连通域
_, markers = cv2.connectedComponents(sure_fg)
markers += 1
markers[unknown==255] = 0
# 应用分水岭
markers = cv2.watershed(img, markers)
img[markers == -1] = [255,0,0] # 标记边界
return img
关键步骤说明:
import cv2
from thresholding import otsu_thresholding
# 加载图像
input_path = "images/sample.jpg"
output_path = "results/threshold_result.jpg"
# 应用Otsu阈值分割
result = otsu_thresholding(input_path)
# 保存结果
cv2.imwrite(output_path, result)
import matplotlib.pyplot as plt
from thresholding import otsu_thresholding
from edge_based import canny_segmentation
from region_based import watershed_segmentation
methods = {
"Otsu": otsu_thresholding,
"Canny": canny_segmentation,
"Watershed": watershed_segmentation
}
img = cv2.imread("images/medical.png", cv2.IMREAD_COLOR)
plt.figure(figsize=(15,10))
for i, (name, func) in enumerate(methods.items(), 1):
plt.subplot(2, 2, i)
result = func(img)
plt.imshow(result, cmap='gray')
plt.title(name)
plt.tight_layout()
plt.savefig("comparison.png")
def medical_image_processing(img_path):
# 读取DICOM或PNG图像
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
# 对比度增强
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
enhanced = clahe.apply(img)
# 自适应阈值
thresh = cv2.adaptiveThreshold(
enhanced, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2
)
# 小区域去除
contours, _ = cv2.findContours(
thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours:
if cv2.contourArea(cnt) < 100:
cv2.drawContours(thresh, [cnt], 0, 0, -1)
return thresh
现象:单个对象被分割为多个区域
解决方法:
blur = cv2.bilateralFilter(img, 9, 75, 75)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
merged = cv2.morphologyEx(result, cv2.MORPH_CLOSE, kernel)
# 增大分水岭的距离变换阈值
_, sure_fg = cv2.threshold(dist, 0.5*dist.max(), 255, 0)
现象:对象边界不连续
优化方案:
edges = cv2.Canny(img, low_thresh, high_thresh, apertureSize=5)
kernel = np.ones((3,3), np.uint8)
closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
edge_detector = cv2.ximgproc.createStructuredEdgeDetection("model.yml")
edges = edge_detector.detectEdges(np.float32(img)/255.0
现象:阴影区域分割失败
处理策略:
bg_model = cv2.createBackgroundSubtractorMOG2()
fg_mask = bg_model.apply(img)
thresh = cv2.adaptiveThreshold(
img, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 51, 10
)
def homomorphic_filter(img):
img_log = np.log1p(np.float32(img))
rows, cols = img.shape
crow, ccol = rows//2, cols//2
mask = np.zeros((rows, cols), np.float32)
D = 30
for i in range(rows):
for j in range(cols):
d = np.sqrt((i-crow)**2 + (j-ccol)**2)
mask[i,j] = 1 - np.exp(-(d**2)/(2*(D**2)))
filtered = np.fft.ifft2(np.fft.ifftshift(np.fft.fftshift(
np.fft.fft2(img_log)) * mask))
return np.exp(np.real(filtered))-1
def multi_scale_segmentation(img_path):
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
results = []
for scale in [0.5, 1.0, 2.0]:
resized = cv2.resize(
img, None,
fx=scale, fy=scale,
interpolation=cv2.INTER_AREA)
# 在不同尺度下分割
thresh = cv2.threshold(
resized, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
# 还原到原尺寸
thresh = cv2.resize(
thresh, (img.shape[1], img.shape[0]),
interpolation=cv2.INTER_NEAREST)
results.append(thresh)
# 融合多尺度结果
final = np.mean(np.stack(results), axis=0)
return (final > 127).astype(np.uint8) * 255
def roi_based_segmentation(img_path):
img = cv2.imread(img_path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 粗略检测感兴趣区域
blur = cv2.GaussianBlur(gray, (25,25), 0)
_, rough_thresh = cv2.threshold(
blur, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 在ROI内精细分割
contours, _ = cv2.findContours(
rough_thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
mask = np.zeros_like(gray)
for cnt in contours:
if cv2.contourArea(cnt) > 1000:
x,y,w,h = cv2.boundingRect(cnt)
roi = gray[y:y+h, x:x+w]
# 精细处理
roi_thresh = cv2.adaptiveThreshold(
roi, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
mask[y:y+h, x:x+w] = roi_thresh
return mask
from multiprocessing import Pool
import os
def process_image(img_file):
img_path = os.path.join("images", img_file)
result = otsu_thresholding(img_path)
cv2.imwrite(f"results/{img_file}", result)
if __name__ == "__main__":
img_files = [f for f in os.listdir("images") if f.endswith(".jpg")]
with Pool(4) as p:
p.map(process_image, img_files)
Otsu阈值法:
Canny边缘检测:
分水岭算法:
图割方法:
超像素技术:
能量最小化框架:
Kite的这个OpenCV图像分割项目虽然聚焦于传统算法,但其清晰的实现和模块化设计为理解图像分割基础提供了绝佳的学习材料。通过本文的技术解析和实战指南,读者可以掌握经典分割技术的核心思想,并具备将其应用于实际项目的能力。在深度学习大行其道的今天,这些传统方法仍在小样本、实时性要求高的场景中发挥着不可替代的作用。