avatar
童琦杰
Jun 19, 2022Technology

基于LIBSVM的验证码识别

获取样本数据

获取批量验证码图片

处理样本数据

使用OpenCV将验证码图片二进制化,针对不同平台导入对应平台的runtime包。

xml
<Project Sdk="Microsoft.NET.Sdk">
  <ItemGroup>
    <PackageReference Include="OpenCvSharp4" Version="4.4.0.20200915" />
    <PackageReference Include="OpenCvSharp4.runtime.win" Version="4.4.0.20200915" />
  </ItemGroup>
</Project>
csharp
public string Binarize(string file, string folder)
{
    folder.EnsureFolder();
    var src = Cv2.ImRead(file);
    var dst = new Mat();
    Cv2.PyrMeanShiftFiltering(src, dst, 10, 10);
    var gray = new Mat();
    Cv2.CvtColor(dst, gray, ColorConversionCodes.BGR2GRAY);
    var binary = new Mat();
    Cv2.Threshold(gray, binary, 0, 255, ThresholdTypes.BinaryInv | ThresholdTypes.Otsu);
    var dilate = new Mat();
    Cv2.Dilate(binary, dilate, Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(1, 1)), iterations: 1);
    Cv2.BitwiseNot(dilate, dilate);
    Cv2.ImWrite($"{folder}/{file.Name()}", dilate);
    return $"{folder}/{file.Name()}";
}

裁剪样本数据

将验证码图片中每个字符裁剪成一张图片。由于每个字符没有相互交叉的像素点,所以可以简单处理如下。

图片处理,这里用到了SixLabors.ImageSharp库。

xml
<Project Sdk="Microsoft.NET.Sdk">
  <ItemGroup>
    <PackageReference Include="SixLabors.ImageSharp" Version="2.1.2" />
  </ItemGroup>
</Project>
csharp
public Image<Rgba32>[] Slice(string file)
{
    var images = new Image<Rgba32>[0];
    using (var inputStream = StreamUtils.GetFileReadStream(file))
    {
        using (var image = Image.Load<Rgba32>(inputStream, out _))
        {
            var start = -1;
            var entered = false;
            for (var x = 0; x < image.Width; x++)
            {
                if (HasValueInColumn(image, x))
                {
                    if (entered)
                    {
                        continue;
                    }
                    else
                    {
                        entered = true;
                        start = x;
                        continue;
                    }
                }
                else
                {
                    if (!entered)
                    {
                        continue;
                    }
                    else
                    {
                        // extract image from start to x - 1
                        var targetImage = ExtractImage(image, start, x - 1);
                        images = images.Append(targetImage);
                        entered = false;
                        continue;
                    }
                }
            }
            if (entered)
            {
                // extract image from start to image.Width - 1
                var targetImage = ExtractImage(image, start, image.Width - 1);
                images = images.Append(targetImage);
            }
        }
    }
    return images;
}

private bool HasValueInColumn(Image<Rgba32> image, int x)
{
    for (var y = 0; y < image.Height; y++)
    {
        var r = image[x, y].R;
        var g = image[x, y].G;
        var b = image[x, y].B;
        if (r < 128 && g < 128 && b < 128)
        {
            return true;
        }
    }
    return false;
}

private bool HasValueInRow(Image<Rgba32> image, int startX, int endX, int y)
{
    for (var x = startX; x <= endX; x++)
    {
        var r = image[x, y].R;
        var g = image[x, y].G;
        var b = image[x, y].B;
        if (r < 128 && g < 128 && b < 128)
        {
            return true;
        }
    }
    return false;
}

private readonly Rgba32 _BackgroundColor = Rgba32.ParseHex("ffffff");

private Image<Rgba32> ExtractImage(Image<Rgba32> image, int startX, int endX)
{
    var startY = -1;
    var endY = -1;
    for (int y = 0; y < image.Height; y++)
    {
        if (HasValueInRow(image, startX, endX, y))
        {
            startY = y;
            break;
        }
        else
        {
            continue;
        }
    }
    for (int y = image.Height - 1; y >= 0; y--)
    {
        if (HasValueInRow(image, startX, endX, y))
        {
            endY = y;
            break;
        }
        else
        {
            continue;
        }
    }
    var width = endX - startX + 1;
    var height = endY - startY + 1;
    var targetImage = new Image<Rgba32>(width, height, _BackgroundColor);
    for (var x = startX; x <= endX; x++)
    {
        for (var y = startY; y <= endY; y++)
        {
            targetImage[x - startX, y - startY] = image[x, y];
        }
    }
    return targetImage;
}
© 2015-2022 tongqijie.com 版权所有沪ICP备17000682号