Deep learning for BARCODE Deblurring Part 1: Create training datasets

4 min readJan 30, 2022

In this part, we will go through steps to create, augment, and save database with python-barcode and albumentations. Firstly, we create the folder and subfolder for storing datasets.

import osdef create_hierarchy(self):
    #folder is [train, val, test] and type is type of barcode as ean or isbn
    for f in self.folder:
        for t in self.type_barcode:
            if not os.path.exists(f'./datasets/{f}_0/{t}/'):
                os.makedirs(f'./datasets/{f}_0/{t}/')

In this work, I used python-barcode for creating the barcode for training model. With this library, you can create eleven different kinds of barcodes. I only work with EAN in this example, so I used randint for creating barcodes. You should create conditions for another type of barcode if needed. For example, ISBN needs to start with 978 or 97910 or 97911. ImageWrite is used if you want to save barcodes as images not sgv.

from barcode.writer import ImageWriter
import barcode
import randomdef generate_code(self, types):
    # Here can base on type of barcode to set condition
    EAN = barcode.get_barcode_class(types)
    random_code = random.randint(10 ** 12, 10 ** 13 - 1)
    code = EAN(str(random_code), writer=ImageWriter())
    return code, random_code

To generate augmentation images, I used albumentations library. I defined some types of common augmentation that often meet in real-world scenarios while scanning images as Motion Blur, Distortion, and BrightnessContrast. However, to simplify my work, I only use Motion Blur with 4 different blur_limit.

import albumentations as Adef data_aug(self, img):
    # Augmentation which try to cover common real-world cases
    train_transform = A.Compose(
        [
            A.MotionBlur(blur_limit=(3, 3), p=0.3),
            A.MotionBlur(blur_limit=(3, 4), p=0.7),
            A.MotionBlur(blur_limit=(3, 5), p=0.5),
            A.MotionBlur(blur_limit=(5, 7), p=0.4)
        ]
    )
    out = train_transform(image=img)['image']
    return out

Let’s create our datasets:

For training, I created 10k images, and each image has 5 augmented images. So, I received 50k images in total.
‘write_text’ = False, if you don’t want to write the number under barcode in image.

def create_database(self):
    for f in self.folder:
        num_sample = 10000 if f == 'train' else 2000
        for t in self.type_barcode:
            for idx in range(num_sample):
                code_img, code_text = self.generate_code(t)
                code_img.save(f'./datasets/{f}_0/{t}/{code_text}',
                              options={"write_text": False})  # not save text under barcode
                for aug in range(5):
                    img = cv2.imread(f'./datasets/{f}_0/{t}/{code_text}.png')
                    new_code = self.data_aug(img=img)
                    cv2.imwrite(f'./datasets/{f}_0/{t}/{code_text}_{aug}.png', new_code)

    return None

Full code:

import csv
import random

import PIL.Image
import albumentations
import albumentations as A
import barcode
import cv2
from barcode.writer import ImageWriter
import os


class DatabaseCreate:
    def __init__(self, folder, type_barcode):
        super().__init__()
        self.folder = folder
        self.type_barcode = type_barcode

    def create_hierarchy(self):
        print('start hier')
        for f in self.folder:
            for t in self.type_barcode:
                if not os.path.exists(f'./datasets/{f}_0/{t}/'):
                    os.makedirs(f'./datasets/{f}_0/{t}/')

    def generate_code(self, types):
        # Here can base on type of barcode to set condition
        EAN = barcode.get_barcode_class(types)
        random_code = random.randint(10 ** 12, 10 ** 13 - 1)
        code = EAN(str(random_code), writer=ImageWriter())
        return code, random_code

    def data_aug(self, img):
        # Augmentation which try to cover common real-world cases
        train_transform = A.Compose(
            [
               
                A.MotionBlur(blur_limit=(3, 3), p=0.3),
                A.MotionBlur(blur_limit=(3, 4), p=0.7),
                A.MotionBlur(blur_limit=(3, 5), p=0.5),
                A.MotionBlur(blur_limit=(5, 7), p=0.4),

            ]
        )
        out = train_transform(image=img)['image']
        return out

    def create_database(self):
        print('start extract')
        for f in self.folder:
            num_sample = 10000 if f == 'train' else 2000
            for t in self.type_barcode:
                for idx in range(num_sample):
                    code_img, code_text = self.generate_code(t)
                    code_img.save(f'./datasets/{f}_0/{t}/{code_text}',
                                  options={"write_text": False})  # not save text under barcode
                    for aug in range(5):
                        img = cv2.imread(f'./datasets/{f}_0/{t}/{code_text}.png')
                        new_code = self.data_aug(img=img)
                        cv2.imwrite(f'./datasets/{f}_0/{t}/{code_text}_{aug}.png', new_code)

        return None

I always prefer saving train, validation, and test datasets in .csv file for loading, let's write a few lines of code for it:

def create_hierarchy(types, ground_truth=False):
    full_link = []
    for (dirpath, dirnames, filenames) in os.walk(f'.\\datasets\\{types}_0'):
        for file in filenames:
            if ground_truth:
                if 'png' in file and '_' not in file:
                    full_link.append(str(dirpath + '\\' + file))

            else:
                if 'png' in file and '_' in file:
                    full_link.append(str(dirpath + '\\' + file))
    random.shuffle(full_link)

    with open(f'./hierarchy/{types}_Restored_0.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        print(full_link[0])
        for link in full_link:
            name = link.split('\\')[-1]
            value = name.split('_')[0]
            writer.writerow([link, link.replace(name, value+'.png')])

Now, you can choose which kinds of barcodes you want to generate, and DONE!

def main():
    folder = ['train', 'val', 'test']
    ean = ['code39']#, 'code128', 'ean', 'ean13', 'ean8']
    # isbn = ['gs1', 'gtin', 'isbn', 'isbn10', 'isbn13', 'issn']
    out = DatabaseCreate(folder, ean)
    out.create_hierarchy()
    out.create_database()
    for idx in folder:
        create_hierarchy(idx, ground_truth=False)

If you are looking for a real-world barcodes database, I recommend you check this dataset out: Muenster BarcodeDB

Part 2: I’m working on building an encoder-decoder model to restore motion blurring barcodes, which is possible for the barcode decoder to read it. However, up to now, I haven’t got a decent result for this model. I will post part 2 if it goes well! Let’s hope :D

Happy learning!

Deep learning for BARCODE Deblurring Part 1: Create training datasets

Written by San