计算机图像处理教程 - 从基础到实践

图像处理基础知识

掌握图像表示、色彩模型和基本概念

图像表示

图像在计算机中以数字矩阵的形式存储，每个像素点包含颜色信息。常见的图像格式包括：

二值图像：每个像素只有黑白两种状态
灰度图像：每个像素用0-255的数值表示亮度
彩色图像：通常用RGB三通道表示颜色
多光谱图像：包含多个波段的信息

色彩模型

不同的色彩模型用于不同场景的图像处理：

RGB模型：红绿蓝三原色，适用于显示器
HSV模型：色调、饱和度、明度，便于颜色调整
CMYK模型：青、品红、黄、黑，适用于印刷
Lab模型：感知均匀的颜色空间

图像质量指标

评估图像质量的重要参数：

分辨率：图像的像素密度
信噪比(SNR)：信号与噪声的比例
峰值信噪比(PSNR)：衡量图像失真程度
结构相似性(SSIM)：感知质量评估

图像文件格式

不同格式适用于不同场景：

JPEG：有损压缩，适合照片
PNG：无损压缩，支持透明度
BMP：未压缩，文件较大
TIFF：高质量，适合专业应用

图像处理常见算法

了解经典图像处理算法及其应用场景

几何变换

改变图像的空间关系：

平移：改变图像位置
旋转：绕某点旋转图像
缩放：改变图像尺寸
仿射变换：保持平行线性质

滤波算法

用于图像增强和去噪：

均值滤波：平滑图像，去除噪声
高斯滤波：保留边缘的平滑处理
中值滤波：去除椒盐噪声
双边滤波：保边去噪

边缘检测

识别图像中的边界和轮廓：

Sobel算子：检测梯度变化
Canny算子：多阶段边缘检测
Prewitt算子：简化版Sobel
Laplacian算子：二阶导数检测

形态学操作

处理二值图像的几何结构：

腐蚀：缩小前景对象
膨胀：扩大前景对象
开运算：先腐蚀后膨胀
闭运算：先膨胀后腐蚀

频域处理

在频率域进行图像处理：

傅里叶变换：时域到频域转换
小波变换：多尺度分析
频域滤波：低通、高通、带通滤波
图像压缩：JPEG等算法基础

特征提取

从图像中提取有意义的信息：

HOG特征：方向梯度直方图
SIFT特征：尺度不变特征变换
ORB特征：快速鲁棒特征
纹理分析：GLCM等方法

Python图像处理示例

使用Python实现经典图像处理算法

Python图像处理基础

环境准备

# 安装必要库
pip install opencv-python pillow numpy matplotlib scikit-image

图像读取与显示

# 使用OpenCV读取和显示图像
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# 读取图像
img = cv2.imread('image.jpg')
# OpenCV使用BGR格式，转换为RGB用于显示
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# 显示图像
plt.figure(figsize=(10, 8))
plt.imshow(img_rgb)
plt.axis('off')
plt.title('Original Image')
plt.show()

# 使用PIL读取图像
pil_img = Image.open('image.jpg')
print(f"图像尺寸: {pil_img.size}")
print(f"图像模式: {pil_img.mode}")

几何变换示例

# 图像缩放
import cv2
import numpy as np

def resize_image(img_path, new_width, new_height):
    img = cv2.imread(img_path)
    resized = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
    return resized

# 图像旋转
def rotate_image(img, angle):
    height, width = img.shape[:2]
    center = (width//2, height//2)
    
    # 获取旋转矩阵
    rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
    
    # 执行旋转
    rotated = cv2.warpAffine(img, rotation_matrix, (width, height))
    return rotated

# 平移变换
def translate_image(img, tx, ty):
    height, width = img.shape[:2]
    
    # 创建平移矩阵
    translation_matrix = np.float32([[1, 0, tx], [0, 1, ty]])
    
    # 执行平移
    translated = cv2.warpAffine(img, translation_matrix, (width, height))
    return translated

滤波算法示例

# 各种滤波算法实现
import cv2
import numpy as np

def apply_filters(img_path):
    img = cv2.imread(img_path)
    
    # 均值滤波
    mean_filtered = cv2.blur(img, (5, 5))
    
    # 高斯滤波
    gaussian_filtered = cv2.GaussianBlur(img, (5, 5), 0)
    
    # 中值滤波
    median_filtered = cv2.medianBlur(img, 5)
    
    # 双边滤波
    bilateral_filtered = cv2.bilateralFilter(img, 9, 75, 75)
    
    return {
        'original': img,
        'mean': mean_filtered,
        'gaussian': gaussian_filtered,
        'median': median_filtered,
        'bilateral': bilateral_filtered
    }

# 自定义卷积核滤波
def custom_convolution(img, kernel):
    """
    使用自定义卷积核进行滤波
    :param img: 输入图像
    :param kernel: 卷积核
    :return: 滤波后图像
    """
    return cv2.filter2D(img, -1, kernel)

# 锐化滤波核
sharpen_kernel = np.array([[-1,-1,-1],
                           [-1, 9,-1],
                           [-1,-1,-1]])

sharpened_img = custom_convolution(img, sharpen_kernel)

边缘检测示例

# 边缘检测算法实现
import cv2
import numpy as np

def edge_detection(img_path):
    img = cv2.imread(img_path, 0)  # 读取为灰度图
    
    # Sobel边缘检测
    sobel_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
    sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)
    
    # Canny边缘检测
    canny_edges = cv2.Canny(img, threshold1=50, threshold2=150)
    
    # Laplacian边缘检测
    laplacian = cv2.Laplacian(img, cv2.CV_64F)
    
    return {
        'original': img,
        'sobel_x': sobel_x,
        'sobel_y': sobel_y,
        'sobel_combined': sobel_combined,
        'canny': canny_edges,
        'laplacian': laplacian
    }

Golang图像处理示例

使用Golang实现图像处理算法

Golang图像处理基础

环境准备

# 安装必要依赖
go mod init image-processing
go get golang.org/x/image/...

图像读取与基本操作

package main

import (
    "image"
    "image/jpeg"
    "image/png"
    "os"
    
    _ "image/gif"
    _ "image/tiff"
    
    "golang.org/x/image/draw"
)

// 读取图像
func loadImage(filename string) (image.Image, error) {
    file, err := os.Open(filename)
    if err != nil {
        return nil, err
    }
    defer file.Close()
    
    img, _, err := image.Decode(file)
    return img, err
}

// 保存JPEG图像
func saveJPEG(img image.Image, filename string) error {
    file, err := os.Create(filename)
    if err != nil {
        return err
    }
    defer file.Close()
    
    return jpeg.Encode(file, img, &jpeg.Options{Quality: 90})
}

// 保存PNG图像
func savePNG(img image.Image, filename string) error {
    file, err := os.Create(filename)
    if err != nil {
        return err
    }
    defer file.Close()
    
    return png.Encode(file, img)
}

// 获取图像尺寸
func getImageSize(img image.Image) (int, int) {
    bounds := img.Bounds()
    width := bounds.Dx()
    height := bounds.Dy()
    return width, height
}

几何变换示例

// 图像缩放
func resizeImage(img image.Image, newWidth, newHeight int) image.Image {
    bounds := img.Bounds()
    dst := image.NewRGBA(image.Rect(0, 0, newWidth, newHeight))
    
    // 使用双线性插值进行缩放
    draw.CatmullRom.Scale(dst, dst.Bounds(), img, bounds, draw.Over, nil)
    
    return dst
}

// 图像裁剪
func cropImage(img image.Image, x, y, width, height int) image.Image {
    bounds := img.Bounds()
    cropBounds := image.Rect(x, y, x+width, y+height)
    
    // 确保裁剪区域在原图范围内
    cropBounds = cropBounds.Intersect(bounds)
    
    dst := image.NewRGBA(cropBounds)
    draw.Draw(dst, cropBounds, img, cropBounds.Min, draw.Src)
    
    return dst
}

// 图像旋转（简化版本 - 90度倍数）
func rotateImage90(img image.Image, times int) image.Image {
    bounds := img.Bounds()
    width, height := bounds.Dx(), bounds.Dy()
    
    // 标准化旋转次数（0-3次90度旋转）
    times = times % 4
    if times < 0 {
        times += 4
    }
    
    var dst image.Image
    
    switch times {
    case 0: // 0度
        return img
    case 1: // 90度
        dst = image.NewRGBA(image.Rect(0, 0, height, width))
        for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
            for x := bounds.Min.X; x < bounds.Max.X; x++ {
                dst.(draw.Image).Set(y, width-1-x, img.At(x, y))
            }
        }
    case 2: // 180度
        dst = image.NewRGBA(image.Rect(0, 0, width, height))
        for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
            for x := bounds.Min.X; x < bounds.Max.X; x++ {
                dst.(draw.Image).Set(width-1-x, height-1-y, img.At(x, y))
            }
        }
    case 3: // 270度
        dst = image.NewRGBA(image.Rect(0, 0, height, width))
        for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
            for x := bounds.Min.X; x < bounds.Max.X; x++ {
                dst.(draw.Image).Set(height-1-y, x, img.At(x, y))
            }
        }
    }
    
    return dst
}

滤波算法示例

// 卷积滤波实现
func convolve(img image.Image, kernel [][]float64) image.Image {
    bounds := img.Bounds()
    width, height := bounds.Dx(), bounds.Dy()
    
    // 创建输出图像
    dst := image.NewRGBA(bounds)
    
    // 计算核的中心位置
    kernelSize := len(kernel)
    kernelCenter := kernelSize / 2
    
    for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
        for x := bounds.Min.X; x < bounds.Max.X; x++ {
            var rSum, gSum, bSum float64
            
            for ky := 0; ky < kernelSize; ky++ {
                for kx := 0; kx < kernelSize; kx++ {
                    // 计算图像中的对应位置
                    imgX := x + kx - kernelCenter
                    imgY := y + ky - kernelCenter
                    
                    // 边界处理：使用镜像填充
                    if imgX < bounds.Min.X {
                        imgX = bounds.Min.X + (bounds.Min.X - imgX)
                    } else if imgX >= bounds.Max.X {
                        imgX = bounds.Max.X - 1 - (imgX - bounds.Max.X)
                    }
                    
                    if imgY < bounds.Min.Y {
                        imgY = bounds.Min.Y + (bounds.Min.Y - imgY)
                    } else if imgY >= bounds.Max.Y {
                        imgY = bounds.Max.Y - 1 - (imgY - bounds.Max.Y)
                    }
                    
                    // 获取像素值
                    r, g, b, _ := img.At(imgX, imgY).RGBA()
                    
                    // 应用核权重
                    weight := kernel[ky][kx]
                    rSum += float64(r>>8) * weight
                    gSum += float64(g>>8) * weight
                    bSum += float64(b>>8) * weight
                }
            }
            
            // 限制值范围并设置像素
            r := uint8(clamp(rSum, 0, 255))
            g := uint8(clamp(gSum, 0, 255))
            b := uint8(clamp(bSum, 0, 255))
            
            dst.Set(x, y, color.RGBA{r, g, b, 255})
        }
    }
    
    return dst
}

// 限制值在指定范围内
func clamp(value float64, min, max float64) float64 {
    if value < min {
        return min
    }
    if value > max {
        return max
    }
    return value
}

// 预定义的滤波核
var (
    // 均值滤波核
    meanKernel = [][]float64{
        {1.0/9, 1.0/9, 1.0/9},
        {1.0/9, 1.0/9, 1.0/9},
        {1.0/9, 1.0/9, 1.0/9},
    }
    
    // 锐化滤波核
    sharpenKernel = [][]float64{
        {-1, -1, -1},
        {-1,  9, -1},
        {-1, -1, -1},
    }
    
    // 边缘检测滤波核
    edgeKernel = [][]float64{
        {-1, -1, -1},
        {-1,  8, -1},
        {-1, -1, -1},
    }
)

色彩空间转换示例

// RGB到灰度转换
func rgbToGrayscale(img image.Image) image.Image {
    bounds := img.Bounds()
    grayImg := image.NewGray(bounds)
    
    for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
        for x := bounds.Min.X; x < bounds.Max.X; x++ {
            r, g, b, _ := img.At(x, y).RGBA()
            
            // 使用加权平均转换为灰度
            grayValue := 0.299*float64(r>>8) + 0.587*float64(g>>8) + 0.114*float64(b>>8)
            
            grayImg.SetGray(x, y, color.Gray{uint8(grayValue)})
        }
    }
    
    return grayImg
}

// RGB到HSV转换
func rgbToHSV(r, g, b uint8) (float64, float64, float64) {
    rf := float64(r) / 255.0
    gf := float64(g) / 255.0
    bf := float64(b) / 255.0
    
    maxVal := math.Max(math.Max(rf, gf), bf)
    minVal := math.Min(math.Min(rf, gf), bf)
    delta := maxVal - minVal
    
    // 计算明度V
    v := maxVal
    
    // 计算饱和度S
    var s float64
    if maxVal == 0 {
        s = 0
    } else {
        s = delta / maxVal
    }
    
    // 计算色调H
    var h float64
    if delta == 0 {
        h = 0
    } else if maxVal == rf {
        h = 60 * math.Mod((gf-bf)/delta, 6)
    } else if maxVal == gf {
        h = 60 * ((bf-rf)/delta + 2)
    } else { // maxVal == bf
        h = 60 * ((rf-gf)/delta + 4)
    }
    
    if h < 0 {
        h += 360
    }
    
    return h, s, v
}

// HSV到RGB转换
func hsvToRGB(h, s, v float64) (uint8, uint8, uint8) {
    c := v * s
    x := c * (1 - math.Abs(math.Mod(h/60, 2) - 1))
    m := v - c
    
    var rf, gf, bf float64
    switch {
    case 0 <= h && h < 60:
        rf, gf, bf = c, x, 0
    case 60 <= h && h < 120:
        rf, gf, bf = x, c, 0
    case 120 <= h && h < 180:
        rf, gf, bf = 0, c, x
    case 180 <= h && h < 240:
        rf, gf, bf = 0, x, c
    case 240 <= h && h < 300:
        rf, gf, bf = x, 0, c
    case 300 <= h && h < 360:
        rf, gf, bf = c, 0, x
    }
    
    r := uint8((rf + m) * 255)
    g := uint8((gf + m) * 255)
    b := uint8((bf + m) * 255)
    
    return r, g, b
}