Pytorch卷积实现边缘检测

发表于： 2023-06-23 分类于： programming

字数：2889 阅读：≈ 6分钟浏览：

本文调用 pytorch 的卷积函数，实现两种边缘检测：

边缘检测（突出中间值）
sobel(索贝尔) 边缘检测

对比这两种实现，sobel(索贝尔) 边缘检测的检测效果看起来更清楚。

阅读前提：

了解深度学习卷积的概念
了解pytorch 的卷积函数、tensor数据类型

边缘检测（突出中间值）

卷积核：

[[-1, -1, -1],
 [-1, 8, -1],
 [-1, -1, -1]]

检测效果

第一个例子 输入：cat.jpg

卷积参数： kernel_size=3, padding=1, stride=1，输出的图像与原图大小一致。卷积效果：

第二个例子 原图

卷积参数： kernel_size=3, padding=1, stride=1，输出的图像与原图大小一致。

卷积结果（图片放大了）：

卷积函数

卷积核：

[[-1, -1, -1],
 [-1, 8, -1],
 [-1, -1, -1]]

def nn_conv2d_2(im):
    # 用nn.Conv2d定义卷积操作，padding=1, stride=1
    conv_op = nn.Conv2d(1, 1, 3, bias=False, stride=1, padding=1)
    # 定义卷积算子参数
    kernel = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype='float32')
    # 将卷积算子转换为适配卷积操作的卷积核
    kernel = kernel.reshape((1, 1, 3, 3))
    # 给卷积操作的卷积核赋值
    conv_op.weight.data = torch.from_numpy(kernel)
    # 对图像进行卷积操作, kernel_size=3, padding=1, stride=1，输出的图像与原图大小一致。
    edge_detect = conv_op(im)
    # 将输出转换为图片格式
    return edge_detect

完整程序

import torch
import numpy as np
from torch import nn
from PIL import Image
import torch.nn.functional as F
import os


def functional_conv2d(im):
    kernel = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype='float32')
    kernel = kernel.reshape((1, 1, 3, 3))
    weight = torch.from_numpy(kernel)
    # 对图像进行卷积操作, kernel_size=3,padding=1, stride=1， 输出的图像与原图大小一致。
    edge_detect = F.conv2d(im, weight, padding=1)
    
    return edge_detect


def nn_conv2d_2(im):
    # 用nn.Conv2d定义卷积操作，padding=1, stride=1
    conv_op = nn.Conv2d(1, 1, 3, bias=False, stride=1, padding=1)
    # 定义卷积算子参数
    kernel = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]], dtype='float32')
    # 将卷积算子转换为适配卷积操作的卷积核
    kernel = kernel.reshape((1, 1, 3, 3))
    # 给卷积操作的卷积核赋值
    conv_op.weight.data = torch.from_numpy(kernel)
    # 对图像进行卷积操作, kernel_size=3, padding=1, stride=1，输出的图像与原图大小一致。
    edge_detect = conv_op(im)
    # 将输出转换为图片格式
    return edge_detect

  
def main():
    # 把工作目录切换到当前文件夹所在目录
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
    # 打印当前工作目录
    print(os.getcwd())
    # 读入一张图片，并转换为灰度图
    im = Image.open('./cat.jpg').convert('L')
    # 将图片数据转换为矩阵
    im = np.array(im, dtype='float32')
    # 将图片矩阵转换为pytorch tensor,并适配卷积输入的要求
    im = torch.from_numpy(im.reshape((1, 1, im.shape[0], im.shape[1])))
    print("im.shape: ", im.shape)
  
    # 边缘检测操作
    # edge_detect = nn_conv2d_2(im)
    # 输出的图片与原图大小相同
    edge_detect = functional_conv2d(im)
    # 将输出限制在0-255之间
    edge_detect = torch.clamp(edge_detect, min=0, max=255)
    
    edge_detect = edge_detect.squeeze().detach().numpy()
    print("edge_dect shape:", edge_detect.shape)
    print("edge_dect: ", edge_detect)

    # 将array数据转换为image
    im_image = Image.fromarray(edge_detect)
    # image数据转换为灰度模式
    im_image = im_image.convert('L')

    # 将Image数据转换为numpy array
    im_L_numpy = np.array(im_image, dtype='uint8')
    print("im_L_numpy.shape: ", im_L_numpy.shape)
    print("im_L_numpy: ", im_L_numpy)

    # 保存图片
    im_image.save('edge_result.jpg', quality=95)

if __name__ == "__main__":
    main()

检测竖直方向的轮廓

卷积核：检查轮廓的效果不如上面的突出中间值的轮廓检测，因为该卷积核只会检测竖直方向的变化，而突出中间值的轮廓检测不会有这个问题。

[[1, 0, -1],
 [2, 0, -2],
 [1, 0, -1]]

检测效果

原图：

卷积参数： kernel_size=3, padding=0, stride=1，输出的图像与原图小2个像素。

卷积效果：与突出中间值结果差不多，但是如果垂直方向的的轮廓（垂直方向的像素值的差值）可以看到有些水平的轮廓没有显示出来。比如图片中洗衣机的圆环的轮廓出现断裂。

检测竖直方向的卷积函数

def functional_conv2d_vertical(im):
    """使用F.Conv2d进行边缘检测, 检测竖直方向的轮廓

    Args:
        im (tensor): 输入的tensor图像

    Returns:
        tensor: 输出的tensor图像
    """    
    sobel_kernel = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]], dtype='float32')
    sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))
    weight = torch.from_numpy(sobel_kernel)
    edge_detect = F.conv2d(im, weight, padding=0)
    # 将输出的tensor的数值都变为正数
    edge_detect = torch.abs(edge_detect)
    return edge_detect

上面这段函数有个关键点：

# 将输出的tensor的数值都变为正数
edge_detect = np.abs(edge_detect)

这是因为卷积核是有方向的，计算时，卷积核第一列是正数，第3列是负数，如果卷积时，第一列对应的原图数值很小，而第3列对应原图的数值很大，那么卷积核与原图元素的卷积结果是负数，但这种情况是一个轮廓点，所以需要将负数变为正数。不然后续转换为灰度图时，负数会变为0。这样就丢失了轮廓。虽然这种方式可以解决负数的问题，不过水平的轮廓还是无法解决的，如果需要考虑水平轮廓可以采用sobel 算子。

sobel(索贝尔) 边缘检测

sobel 的实现思路就是使用两个算子，一个检测水平方向的算子，另一个是检测垂直方向的算子，然后将两个方向的边缘进行叠加：将水平方向与竖直方向的边缘检测结果进行平方、相加，再开方

完整程序再最后面。

检测效果

原图：效果还是不错的：

sobel(索贝尔) 卷积函数

检测竖直方向轮廓的卷积函数：

def functional_conv2d_vertical(im):
    """使用F.Conv2d进行边缘检测, 检测竖直方向的轮廓

    Args:
        im (tensor): 输入的tensor图像

    Returns:
        tensor: 输出的tensor图像
    """    
    sobel_kernel = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]], dtype='float32')
    sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))
    weight = torch.from_numpy(sobel_kernel)
    edge_detect = F.conv2d(im, weight, padding=0)
    # 将输出的tensor的数值都变为正数
    edge_detect = torch.abs(edge_detect)
    return edge_detect

卷积核：

[[1, 0, -1],
 [2, 0, -2],
 [1, 0, -1]]

检查轮廓的效果不如上面的突出中间值的轮廓检测，因为该卷积核只会检测竖直方向的变化，而突出中间值的轮廓检测不会有这个问题。

检测水平方向轮廓的卷积函数：

def functional_conv2d_horizontal(im):
    """使用F.Conv2d进行边缘检测, 检测水平方向的轮廓

    Args:
        im (tensor): 输入的tensor图像

    Returns:
        tensor: 输出的tensor图像
    """    
    sobel_kernel = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], dtype='float32')
    sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))
    weight = torch.from_numpy(sobel_kernel)
    edge_detect = F.conv2d(im, weight, padding=0)
    # 将输出的tensor的数值都变为正数
    edge_detect = torch.abs(edge_detect)
    return edge_detect

检测水平方向卷积核：

[[1, 2, 1],
 [0, 0, 0],
 [-1,-2,-1]]

完整程序

import torch
import numpy as np
from torch import nn
from PIL import Image
import torch.nn.functional as F
import os


def functional_conv2d_vertical(im):
    """使用F.Conv2d进行边缘检测, 检测竖直方向的轮廓

    Args:
        im (tensor): 输入的tensor图像

    Returns:
        tensor: 输出的tensor图像
    """    
    sobel_kernel = np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]], dtype='float32')
    sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))
    weight = torch.from_numpy(sobel_kernel)
    edge_detect = F.conv2d(im, weight, padding=0)
    # 将输出的tensor的数值都变为正数
    edge_detect = torch.abs(edge_detect)
    return edge_detect

def functional_conv2d_horizontal(im):
    """使用F.Conv2d进行边缘检测, 检测水平方向的轮廓

    Args:
        im (tensor): 输入的tensor图像

    Returns:
        tensor: 输出的tensor图像
    """    
    sobel_kernel = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], dtype='float32')
    sobel_kernel = sobel_kernel.reshape((1, 1, 3, 3))
    weight = torch.from_numpy(sobel_kernel)
    edge_detect = F.conv2d(im, weight, padding=0)
    # 将输出的tensor的数值都变为正数
    edge_detect = torch.abs(edge_detect)
    return edge_detect
  
def main():
    # 把工作目录切换到当前文件夹所在目录
    os.chdir(os.path.dirname(os.path.abspath(__file__)))
    # 打印当前工作目录
    print(os.getcwd())
    # 读入一张图片，并转换为灰度图
    im = Image.open('./cat.jpg').convert('L')
    # 将图片数据转换为矩阵
    im = np.array(im, dtype='float32')
    # 将图片矩阵转换为pytorch tensor,并适配卷积输入的要求
    im = torch.from_numpy(im.reshape((1, 1, im.shape[0], im.shape[1])))
    print("im.shape: ", im.shape)
  
    # 使用nn.Conv2d进行边缘检测
    edge_detect_h = functional_conv2d_horizontal(im)
    edge_detect_h = torch.clamp(edge_detect_h, min=0, max=255)
    dege_detect_v = functional_conv2d_vertical(im)
    dege_detect_v = torch.clamp(dege_detect_v, min=0, max=255)

    # 将水平方向与竖直方向的边缘检测结果进行平方、相加，再开方
    edge_detect = torch.sqrt(edge_detect_h**2 + dege_detect_v**2)
    # 将输出限制在0-255之间
    edge_detect = torch.clamp(edge_detect, min=0, max=255)
    
    edge_detect = edge_detect.squeeze().detach().numpy()
    print("edge_dect shape:", edge_detect.shape)
    print("edge_dect: ", edge_detect)

    # 将array数据转换为image
    im_image = Image.fromarray(edge_detect)
    # image数据转换为灰度模式
    im_image = im_image.convert('L')

    # 将Image数据转换为numpy array
    im_L_numpy = np.array(im_image, dtype='uint8')
    print("im_L_numpy.shape: ", im_L_numpy.shape)
    print("im_L_numpy: ", im_L_numpy)

    # 保存图片
    im_image.save('edge_result.jpg', quality=95)

if __name__ == "__main__":
    main()

数值裁剪到0-255

注意这个函数： edge_detect_h = torch.clamp(edge_detect_h, min=0, max=255)

它的目的： torch.clamp 是 PyTorch 中的一个函数，它的作用是将输入张量中的每个元素限制在指定范围内，并返回一个新的张量。具体来说，torch.clamp(input, min, max) 函数将输入张量 input 中的每个元素限制在 [min, max] 的范围内，如果元素小于 min，则将其设置为 min；如果元素大于 max，则将其设置为 max；否则保持不变。

例如，如果有一个张量 x = torch.tensor([1, 2, 3, 4, 5])，我们可以使用 torch.clamp(x, 2, 4) 将其限制在 [2, 4] 的范围内，得到一个新的张量 torch.tensor([2, 2, 3, 4, 4])。

简单来说：它可以将小于0的数值变为0，大于255的数值变为255。之前考虑过下面的操作：

edge_detect = edge_detect * 255.0 / edge_detect.max()

但是这种将数值拉到区间[0, 255], 容易受到一些很大值的干扰，这样最后的图片整体亮度偏低。之前还考虑过使用直方图均衡化，来调节对比度，但是这样会凸显一些之前不明显的轮廓，这不符我的原意。最后使用torch.clamp效果就挺好了。

参考

第五章_卷积神经网络 https://neucrack.com/p/377