Deep learning for BARCODE Deblurring Part 1: Create training datasets

San
4 min readJan 30, 2022
Barcode

In this part, we will go through steps to create, augment, and save database with python-barcode and albumentations. Firstly, we create the folder and subfolder for storing datasets.

import osdef create_hierarchy(self):
#folder is [train, val, test] and type is type of barcode as ean or isbn
for f in self.folder:
for t in self.type_barcode:
if not os.path.exists(f'./datasets/{f}_0/{t}/'):
os.makedirs(f'./datasets/{f}_0/{t}/')

In this work, I used python-barcode for creating the barcode for training model. With this library, you can create eleven different kinds of barcodes. I only work with EAN in this example, so I used randint for creating barcodes. You should create conditions for another type of barcode if needed. For example, ISBN needs to start with 978 or 97910 or 97911. ImageWrite is used if you want to save barcodes as images not sgv.

from barcode.writer import ImageWriter
import barcode
import random
def generate_code(self, types):
# Here can base on type of barcode to set condition
EAN = barcode.get_barcode_class(types)
random_code = random.randint(10 ** 12, 10 ** 13 - 1)
code = EAN(str(random_code), writer=ImageWriter())
return code, random_code

To generate augmentation images, I used albumentations library. I defined some types of common augmentation that often meet in real-world scenarios while scanning images as Motion Blur, Distortion, and BrightnessContrast. However, to simplify my work, I only use Motion Blur with 4 different blur_limit.

import albumentations as Adef data_aug(self, img):
# Augmentation which try to cover common real-world cases
train_transform = A.Compose(
[
A.MotionBlur(blur_limit=(3, 3), p=0.3),
A.MotionBlur(blur_limit=(3, 4), p=0.7),
A.MotionBlur(blur_limit=(3, 5), p=0.5),
A.MotionBlur(blur_limit=(5, 7), p=0.4)
]
)
out = train_transform(image=img)['image']
return out

Let’s create our datasets:

  • For training, I created 10k images, and each image has 5 augmented images. So, I received 50k images in total.
  • ‘write_text’ = False, if you don’t want to write the number under barcode in image.
def create_database(self):
for f in self.folder:
num_sample = 10000 if f == 'train' else 2000
for t in self.type_barcode:
for idx in range(num_sample):
code_img, code_text = self.generate_code(t)
code_img.save(f'./datasets/{f}_0/{t}/{code_text}',
options={"write_text": False}) # not save text under barcode
for aug in range(5):
img = cv2.imread(f'./datasets/{f}_0/{t}/{code_text}.png')
new_code = self.data_aug(img=img)
cv2.imwrite(f'./datasets/{f}_0/{t}/{code_text}_{aug}.png', new_code)

return None

Full code:

import csv
import random

import PIL.Image
import albumentations
import albumentations as A
import barcode
import cv2
from barcode.writer import ImageWriter
import os


class DatabaseCreate:
def __init__(self, folder, type_barcode):
super().__init__()
self.folder = folder
self.type_barcode = type_barcode

def create_hierarchy(self):
print('start hier')
for f in self.folder:
for t in self.type_barcode:
if not os.path.exists(f'./datasets/{f}_0/{t}/'):
os.makedirs(f'./datasets/{f}_0/{t}/')

def generate_code(self, types):
# Here can base on type of barcode to set condition
EAN = barcode.get_barcode_class(types)
random_code = random.randint(10 ** 12, 10 ** 13 - 1)
code = EAN(str(random_code), writer=ImageWriter())
return code, random_code

def data_aug(self, img):
# Augmentation which try to cover common real-world cases
train_transform = A.Compose(
[

A.MotionBlur(blur_limit=(3, 3), p=0.3),
A.MotionBlur(blur_limit=(3, 4), p=0.7),
A.MotionBlur(blur_limit=(3, 5), p=0.5),
A.MotionBlur(blur_limit=(5, 7), p=0.4),

]
)
out = train_transform(image=img)['image']
return out

def create_database(self):
print('start extract')
for f in self.folder:
num_sample = 10000 if f == 'train' else 2000
for t in self.type_barcode:
for idx in range(num_sample):
code_img, code_text = self.generate_code(t)
code_img.save(f'./datasets/{f}_0/{t}/{code_text}',
options={"write_text": False}) # not save text under barcode
for aug in range(5):
img = cv2.imread(f'./datasets/{f}_0/{t}/{code_text}.png')
new_code = self.data_aug(img=img)
cv2.imwrite(f'./datasets/{f}_0/{t}/{code_text}_{aug}.png', new_code)

return None

I always prefer saving train, validation, and test datasets in .csv file for loading, let's write a few lines of code for it:

def create_hierarchy(types, ground_truth=False):
full_link = []
for (dirpath, dirnames, filenames) in os.walk(f'.\\datasets\\{types}_0'):
for file in filenames:
if ground_truth:
if 'png' in file and '_' not in file:
full_link.append(str(dirpath + '\\' + file))

else:
if 'png' in file and '_' in file:
full_link.append(str(dirpath + '\\' + file))
random.shuffle(full_link)

with open(f'./hierarchy/{types}_Restored_0.csv', 'w', newline='') as f:
writer = csv.writer(f)
print(full_link[0])
for link in full_link:
name = link.split('\\')[-1]
value = name.split('_')[0]
writer.writerow([link, link.replace(name, value+'.png')])

Now, you can choose which kinds of barcodes you want to generate, and DONE!

def main():
folder = ['train', 'val', 'test']
ean = ['code39']#, 'code128', 'ean', 'ean13', 'ean8']
# isbn = ['gs1', 'gtin', 'isbn', 'isbn10', 'isbn13', 'issn']
out = DatabaseCreate(folder, ean)
out.create_hierarchy()
out.create_database()
for idx in folder:
create_hierarchy(idx, ground_truth=False)

If you are looking for a real-world barcodes database, I recommend you check this dataset out: Muenster BarcodeDB

Part 2: I’m working on building an encoder-decoder model to restore motion blurring barcodes, which is possible for the barcode decoder to read it. However, up to now, I haven’t got a decent result for this model. I will post part 2 if it goes well! Let’s hope :D

Happy learning!

--

--