使用深度学习进行图像类任务时，通常网络的输入大小是固定的，最近在进行涉及到文字检测的工作中，由于预处理resize缩小了原图，导致字体变模糊，从而检测失败，后来想到使用overlap来对图像进行缩放裁剪，即先将原图缩放到一定尺寸，再裁剪得到网络的输入。

好了，来说正题，使用yolov3，网络的输入是352x352x3，而输入图像大小为几百上千不等，因此需对原图进行resize，起初直接进行缩放 + 填充，检测的map很低，后来分析发现有些352x352的输入图像中的文字已经很模糊，因此直接缩放的方案不可行，改进后方案如下：

原图最大尺寸大于1000，则resize到800x800，再裁剪为9个352x352，overlap为128个像素
原图最大尺寸大小500且小于1000，则resize到600x600，再裁剪为4个352x352，overlap为96个像素
原图最大尺寸小于500，则resize到352x352。

python实现代码如下，使用了PIL、opencv库，将整个目录下的图像全部做缩放裁剪处理，代码包含如下功能：

遍历某一目录的文件
opencv进行图像载入及保存
opencv进行缩放裁剪

PIL进行图像显示

import numpy as np
from PIL import Image
import cv2
import os

输入bgr通道，并显示图像

def img_show(img):

img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)      #生成numpy数组
print(type(img_rgb), img_rgb.shape)
pil_img = Image.fromarray(img_rgb)
pil_img.show()

完成图像的等比缩放及黑色填充

def img_resize(cvImageSrc, net_size, width, height):

if (width != net_size or height != net_size):
    #宽高的缩放比非常接近时直接进行缩放
    if ((net_size / height - net_size / height) < 0.001):
        det_mat = cv2.resize(cvImageSrc, (net_size, net_size))
        return det_mat
    else:
        new_w = width
        new_h = height
    if (net_size / width < net_size / height):
        new_w = net_size
        new_h = max(1, (height * net_size) / width)
    else:
        new_h = net_size
        new_w = max(1, (width * net_size) / height)
    det_mat = np.zeros((net_size, net_size, 3), dtype="uint8")
    if (new_w == width and new_h == height):
        cvImageSrc.copyTo(det_mat)
    else:
        net_w = int(new_w + 0.5)
        net_h = int(new_h + 0.5)
        if (net_w % 2 == 1):
            net_w = net_w - 1
        if (net_h % 2 == 1):
            net_h = net_h - 1
        #print(net_w, net_h)
        det_matROI = cv2.resize(cvImageSrc, (net_w, net_h))
        base_w = int((net_size - new_w) / 2 + 0.5)
        base_h = int((net_size - new_h) / 2 + 0.5)
        #print(base_h, base_w)
        for c in range(3):
            for j in range(net_h):
                for i in range(net_w):
                    #print(c, j, i)
                    det_mat[j + base_h - 1, i + base_w - 1, :] = det_matROI[j - 1, i - 1, :]
else:
    det_mat = cvImageSrc
return det_mat

baseRoot = “/Users/lemonhe/Documents/CNN/dataset/01-data/“
rootdir = baseRoot + “dataset_test”
list = os.listdir(rootdir) #列出文件夹下所有的目录与文件
print(len(list))
count = 0

threshold1 = 1000
threshold2 = 500

for i in range(0,len(list)):

path = os.path.join(rootdir, list[i])
print(path)
if os.path.isfile(path):
    img = cv2.imread(path)
    if(img is None):
        print("this is nonetype")
    else:
        height, width, channel = img.shape      #获取图像信息
        print(height, width, channel)
        max_dim = max(height, width)
        #img_show(img)
        if(max_dim > threshold1):
            det_mat = img_resize(img, 800, width, height)
            #img_show(det_mat)
            img11 = np.uint8(det_mat[0:352, 0:352, :])
            img12 = np.uint8(det_mat[0:352, 223:575, :])
            img13 = np.uint8(det_mat[0:352, 447:799, :])
            img21 = np.uint8(det_mat[223:575, 0:352, :])
            img22 = np.uint8(det_mat[223:575, 223:575, :])
            img23 = np.uint8(det_mat[223:575, 447:799, :])
            img31 = np.uint8(det_mat[447:799, 0:352, :])
            img32 = np.uint8(det_mat[447:799, 223:575, :])
            img33 = np.uint8(det_mat[447:799, 447:799, :])
            #print(img13.shape)
            path11 = baseRoot + "test1/img" + str(count) + "_11.jpg"
            path12 = baseRoot + "test1/img" + str(count) + "_12.jpg"
            path13 = baseRoot + "test1/img" + str(count) + "_13.jpg"
            path21 = baseRoot + "test1/img" + str(count) + "_21.jpg"
            path22 = baseRoot + "test1/img" + str(count) + "_22.jpg"
            path23 = baseRoot + "test1/img" + str(count) + "_23.jpg"
            path31 = baseRoot + "test1/img" + str(count) + "_31.jpg"
            path32 = baseRoot + "test1/img" + str(count) + "_32.jpg"
            path33 = baseRoot + "test1/img" + str(count) + "_33.jpg"
            cv2.imwrite(path11, img11)
            cv2.imwrite(path12, img12)
            cv2.imwrite(path13, img13)
            cv2.imwrite(path21, img21)
            cv2.imwrite(path22, img22)
            cv2.imwrite(path23, img23)
            cv2.imwrite(path31, img31)
            cv2.imwrite(path32, img32)
            cv2.imwrite(path33, img33)
        elif(max_dim > threshold2):
            det_mat = img_resize(img, 608, width, height)
            img11 = np.uint8(det_mat[0:352, 0:352, :])
            img12 = np.uint8(det_mat[0:352, 255:607, :])
            img21 = np.uint8(det_mat[255:607, 0:352, :])
            img22 = np.uint8(det_mat[255:607, 255:607, :])
            #img_show(img11)
            #img_show(img12)
            #img_show(img21)net_size
            #img_show(img22)
            path11 = baseRoot + "test1/img" + str(count) + "_11.jpg"
            path12 = baseRoot + "test1/img" + str(count) + "_12.jpg"
            path21 = baseRoot + "test1/img" + str(count) + "_21.jpg"
            path22 = baseRoot + "test1/img" + str(count) + "_22.jpg"
            cv2.imwrite(path11, img11)
            cv2.imwrite(path12, img12)
            cv2.imwrite(path21, img21)
            cv2.imwrite(path22, img22)
        else:
            det_mat = img_resize(img, 352, width, height)
            img_show(det_mat)
            path_352 = baseRoot + "test1/img" + str(count) + ".jpg"
            cv2.imwrite(path_352, np.uint8(det_mat))
count = count + 1
print(count)