BMW-TensorFlow-Inference-API-GPU/src/main/ocr.py at ddbcab1998b06c42f0f64fa29eadeacee2c090fb · BMW-InnovationLab/BMW-TensorFlow-Inference-API-GPU · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import pytesseract
import unicodedata
import re
import numpy as np

# from PIL import Image, ImageFont, ImageDraw, ImageEnhance


# Define class variables

bounding_box_order = ["left", "top", "right", "bottom"]


# This method will take the model bounding box predictions and return the extracted text inside each box
def one_shot_ocr_service(image, output):
    # iterate over detections
    response = []
    detections = output['bounding-boxes']

    for i in range(len(detections)):

        # crop image for every detection:
        coordinates = (detections[i]["coordinates"])
        cropped = image.crop((float(coordinates["left"]), float(
            coordinates["top"]), float(coordinates["right"]), float(coordinates["bottom"])))

        # convert image to grayscale for better accuracy
        processed_img = cropped.convert('L')

        # extract text with positive confidence from cropped image
        df = pytesseract.image_to_data(processed_img, output_type='data.frame')
        valid_df = df[df["conf"] > 0]
        extracted_text = " ".join(valid_df["text"].values)

        # process text
        extracted_text = str(
            unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n",
                                                                                                              " ").replace(
            "...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"',
                                                                                                         '').replace(
            "alt/1m", "").strip()
        extracted_text = re.sub(
            '[^A-Za-z0-9.!?,;%:=()\[\]$€&/\- ]+', '', extracted_text)
        extracted_text = " ".join(extracted_text.split())

        # wrap each prediction inside a dictionary
        if len(extracted_text) is not 0:
            prediction = {'text': extracted_text}
            bounding_box = [coordinates[el] for el in bounding_box_order]
            prediction["box"] = bounding_box
            prediction["score"] = valid_df["conf"].mean() / 100.0

            response.append(prediction)

    return response


# This method will take an image and return the extracted text from the image
def ocr_service(image):
    # convert image to grayscale for better accuracy
    processed_img = image.convert('L')

    # Get data including boxes, confidences, line and page numbers
    df = pytesseract.image_to_data(processed_img, output_type='data.frame')
    valid_df = df[df["conf"] > 0]

    # process text
    extracted_text = " ".join(valid_df["text"].values)
    extracted_text = str(
        unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n",
                                                                                                          " ").replace(
        "...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"', '').replace(
        "alt/1m", "").strip()
    extracted_text = re.sub(
        '[^A-Za-z0-9.!?,;%:=()\[\]$€&/\- ]+', '', extracted_text)
    extracted_text = " ".join(extracted_text.split())

    # calculate the bounding box data based on pytesseract results
    coordinates = {}
    index = valid_df.index.values
    coordinates["left"] = valid_df.loc[index[0], "left"]
    coordinates["top"] = valid_df.loc[index[0], "top"]
    coordinates["bottom"] = valid_df.loc[index[-1],
                                         "top"] + valid_df.loc[index[-1], "height"]
    coordinates["right"] = valid_df.loc[index[-1],
                                        "left"] + valid_df.loc[index[-1], "width"]
    bounding_box = [coordinates[el].item() for el in bounding_box_order]

    # wrap each prediction inside a dictionary
    response = {
        'text': extracted_text,
        'box': bounding_box,
        'score': valid_df["conf"].mean() / 100.0,
    }

    return [response]