Skip to content

Reference

OCR class¤

Performs OCR on a given image, saves an image with boxes around the words, and converts the extracted text to an MP3 file.

Add Tesseract OCR's installation location in PATH for functions using it to work.

Args:

preprocess:
    Set True if the image is a real life photo of some large meaningful (page of
    a book). Usually set to False when OCRing using `ocr_meaningful_text` to
    preprocess the image.
    Set False if the image is a scanned photo (an e-book). It will not be
    pre-processed before OCRing.
    Use the `Preprocessor` class manually to have more control!
path:
    Path of the image to be used.

Examples:

>>> import sys
>>> sys.displayhook = lambda x: None
>>> import ocred
>>> ocr = ocred.OCR(
...     False, # preprocess -> to preprocess the image
...     "./images/Page.png"
... )
>>> ocr.ocr_meaningful_text(save_output=True)
Source code in ocred/ocr.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
class OCR:
    """
    Performs OCR on a given image, saves an image with boxes around the words, and
    converts the extracted text to an MP3 file.

    Add Tesseract OCR's installation location in PATH for functions using it to work.

    Args:

        preprocess:
            Set True if the image is a real life photo of some large meaningful (page of
            a book). Usually set to False when OCRing using `ocr_meaningful_text` to
            preprocess the image.
            Set False if the image is a scanned photo (an e-book). It will not be
            pre-processed before OCRing.
            Use the `Preprocessor` class manually to have more control!
        path:
            Path of the image to be used.

    Examples:
        >>> import sys
        >>> sys.displayhook = lambda x: None
        >>> import ocred
        >>> ocr = ocred.OCR(
        ...     False, # preprocess -> to preprocess the image
        ...     "./images/Page.png"
        ... )
        >>> ocr.ocr_meaningful_text(save_output=True)
    """

    def __init__(self, preprocess: bool, path: str) -> None:
        self.path = path
        self.preprocess = preprocess

        if self.preprocess:
            preprocessed = Preprocessor(self.path)

            # scan the image and copy the scanned image
            preprocessed.scan()
            orig = preprocessed.img.copy()

            # remove noise
            preprocessed.remove_noise()

            # thicken the ink to draw Hough lines better
            preprocessed.thicken_font()

            # calculate the median angle of all the Hough lines
            _, median_angle = preprocessed.rotate()

            # rotate the original scanned image
            rotated = ndimage.rotate(orig, median_angle)

            # remove noise again
            preprocessed = Preprocessor(rotated)
            preprocessed.remove_noise()

            cv2.imwrite("preprocessed.png", preprocessed.img)
            self.path = "preprocessed.png"

    def ocr_meaningful_text(
        self,
        *,
        tesseract_config: str | None = "-l eng --oem 1",
        preserve_orientation: bool | None = False,
        save_output: bool | None = False,
    ) -> str:
        """
        Performs OCR on long meaningful text documents and saves the image with boxes
        around the words. For example - books, PDFs etc.

        Args:
            tesseract_config:
                Configuration passed down to the Tesseract OCR Engine.
            preserve_orientation:
                Preserves the orientation of OCRed text.
            save_output:
                Saves the text to `output.txt` file.

        Returns:
            text:
                The extracted text.
        """
        # reading the image
        img = cv2.imread(self.path)

        # extracting the text
        self.text = pytesseract.image_to_string(img, config=tesseract_config)
        if not preserve_orientation:
            self.text = self.text.replace("-\n", "").replace("\n", " ")

        # adding boxes around the words
        boxes = pytesseract.image_to_data(img)
        for z, box in enumerate(boxes.splitlines()):
            if z != 0:
                box = box.split()

                # if the data has a word
                if len(box) == 12:
                    x, y = int(box[6]), int(box[7])
                    h, w = int(box[8]), int(box[9])

                    cv2.rectangle(img, (x, y), (x + h, y + w), (0, 0, 255), 1)

        cv2.imwrite("OCR.png", img)

        if save_output:
            self.save_output()

        return self.text

    def ocr_sparse_text(
        self,
        *,
        languages: list[str] | None = ["en", "hi"],
        decoder: str | None = "greedy",
        save_output: bool | None = False,
    ) -> tuple[str, typing.Any]:
        """
        Performs OCR on sparse text and saves the image with boxes around the words.
        This method can be used to OCR documents in which the characters don't form
        any proper/meaningful sentences, or if there are very less meaningful sentences,
        for example - bills, sign-boards etc.

        Args:
            languages:
                A list of languages that the signboard possible has.
                Note: Provide only the languages that are present in the image, adding
                additional languages misguides the model.
            decoder:
                If the document has a larger number of meaningful sentences then use
                "beamsearch". For most of the cases "greedy" works very well.
            save_output:
                Saves the text to `output.txt` file.

        Returns:
            text:
                The extracted text.
            detailed_text:
                Text with extra information (returned by easyocr.Reader.readtext()).
        """
        self.text = ""

        # reading the image using open-cv and easyocr
        img = cv2.imread(self.path)
        reader = easyocr.Reader(
            languages
        )  # slow for the first time (also depends upon CPU/GPU)
        self.detailed_text: typing.Any = reader.readtext(
            self.path, decoder=decoder, batch_size=5
        )

        for text in self.detailed_text:
            # extracting the coordinates to highlight the text
            coords_lower = text[0][:2]
            coords_upper = text[0][2:4]

            coords_lower.sort(key=lambda x: x[0])
            pt1 = [int(x) for x in coords_upper[-1]]

            coords_lower.sort(key=lambda x: x[0])
            pt2 = [int(x) for x in coords_lower[-1]]

            # highlighting the text
            cv2.rectangle(img, pt1, pt2, (0, 0, 255), 1)

            self.text = self.text + " " + text[-2]

        cv2.imwrite("OCR.png", img)

        if save_output:
            self.save_output()

        return self.text, self.detailed_text

    def process_extracted_text_from_invoice(self) -> dict[str, typing.Any]:
        """
        This method processes the extracted text from invoices, and returns some useful
        information.

        Returns:
            extracted_info:
                The extracted information.
        """
        if not hasattr(self, "detailed_text"):
            raise ValueError("no invoice OCRed; OCR an invoice first")

        import nltk

        nltk.download("punkt")
        nltk.download("wordnet")
        nltk.download("stopwords")

        self.extracted_info = {}
        self.text_list = self.text.split(" ")

        # find date
        date_re = re.compile(
            r"^([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(\.|-|\/)([1-9]|0[1-9]|1[0-2])(\.|-|\/)([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])$",
        )
        date = list(filter(date_re.match, self.text_list))

        # find phone number
        phone_number_re = re.compile(
            r"((\+*)((0[ -]*)*|((91 )*))((\d{12})+|(\d{10})+))|\d{5}([- ]*)\d{6}",
        )
        phone_number = list(filter(phone_number_re.match, self.text_list))

        # find place
        place = self.detailed_text[0][-2]

        # remove puntuations and redundant words
        tokenizer = nltk.RegexpTokenizer(r"\w+")
        removed_punctuation = tokenizer.tokenize(self.text)

        stop_words = set(nltk.corpus.stopwords.words("english"))
        post_processed_word_list = [
            w for w in removed_punctuation if w not in stop_words
        ]

        # find order number
        order_number: str | int = ""
        for i in range(len(post_processed_word_list)):
            if post_processed_word_list[i].lower() == "order":
                try:
                    order_number = int(post_processed_word_list[i + 1])
                except Exception:
                    order_number = post_processed_word_list[i + 2]
                break

        # find total price
        price: list[typing.Any] | str = ""

        # try finding a number with Rs, INR, ₹ or रे in front of it or Rs, INR at the end
        # of it
        try:
            price = re.findall(
                r"(?:Rs\.?|INR|₹\.?|रे\.?)\s*(\d+(?:[.,]\d+)*)|(\d+(?:[.,]\d+)*)\s*(?:Rs\.?|INR)",
                self.text,
            )
            price = list(map(float, price))
            price = max(price)
        # try finding numbers with "grand total" or "total" written in front of them
        except ValueError:
            lowered_list = [x.lower() for x in post_processed_word_list]
            if "grand" in lowered_list:
                indices = [i for i, x in enumerate(lowered_list) if x == "grand"]
                i = indices[-1]
                price = post_processed_word_list[i + 2]
            elif "total" in lowered_list:
                indices = [i for i, x in enumerate(lowered_list) if x == "total"]
                i = indices[-1]
                price = post_processed_word_list[i + 1]

        self.extracted_info.update(
            {
                "price": price,
                "date": date,
                "place": place,
                "order_number": order_number,
                "phone_number": phone_number,
                "post_processed_word_list": post_processed_word_list,
            }
        )

        return self.extracted_info

    def save_output(self) -> None:
        """Saves the extracted text in the `output.txt` file."""
        if not hasattr(self, "text"):
            raise ValueError("no text OCRed; OCR a document first")
        f = open("output.txt", "w", encoding="utf-8")
        f.write(self.text)
        f.close()

    def text_to_speech(self, *, lang: str | None = "en") -> None:
        """
        DANGER: Deprecated since version v0.2.0.
        Instead, use gTTS manually.

        Converts the extracted text to speech and save it as an MP3 file.

        Args:
            lang:
                Language of the processed text.
        """
        raise DeprecationWarning(
            "text_to_speech is deprecated and was removed in v0.2.0; use gTTS manually",
        )

ocr_meaningful_text(*, tesseract_config='-l eng --oem 1', preserve_orientation=False, save_output=False) ¤

Performs OCR on long meaningful text documents and saves the image with boxes around the words. For example - books, PDFs etc.

Parameters:

Name Type Description Default
tesseract_config str | None

Configuration passed down to the Tesseract OCR Engine.

'-l eng --oem 1'
preserve_orientation bool | None

Preserves the orientation of OCRed text.

False
save_output bool | None

Saves the text to output.txt file.

False

Returns:

Name Type Description
text str

The extracted text.

Source code in ocred/ocr.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def ocr_meaningful_text(
    self,
    *,
    tesseract_config: str | None = "-l eng --oem 1",
    preserve_orientation: bool | None = False,
    save_output: bool | None = False,
) -> str:
    """
    Performs OCR on long meaningful text documents and saves the image with boxes
    around the words. For example - books, PDFs etc.

    Args:
        tesseract_config:
            Configuration passed down to the Tesseract OCR Engine.
        preserve_orientation:
            Preserves the orientation of OCRed text.
        save_output:
            Saves the text to `output.txt` file.

    Returns:
        text:
            The extracted text.
    """
    # reading the image
    img = cv2.imread(self.path)

    # extracting the text
    self.text = pytesseract.image_to_string(img, config=tesseract_config)
    if not preserve_orientation:
        self.text = self.text.replace("-\n", "").replace("\n", " ")

    # adding boxes around the words
    boxes = pytesseract.image_to_data(img)
    for z, box in enumerate(boxes.splitlines()):
        if z != 0:
            box = box.split()

            # if the data has a word
            if len(box) == 12:
                x, y = int(box[6]), int(box[7])
                h, w = int(box[8]), int(box[9])

                cv2.rectangle(img, (x, y), (x + h, y + w), (0, 0, 255), 1)

    cv2.imwrite("OCR.png", img)

    if save_output:
        self.save_output()

    return self.text

ocr_sparse_text(*, languages=['en', 'hi'], decoder='greedy', save_output=False) ¤

Performs OCR on sparse text and saves the image with boxes around the words. This method can be used to OCR documents in which the characters don't form any proper/meaningful sentences, or if there are very less meaningful sentences, for example - bills, sign-boards etc.

Parameters:

Name Type Description Default
languages list[str] | None

A list of languages that the signboard possible has. Note: Provide only the languages that are present in the image, adding additional languages misguides the model.

['en', 'hi']
decoder str | None

If the document has a larger number of meaningful sentences then use "beamsearch". For most of the cases "greedy" works very well.

'greedy'
save_output bool | None

Saves the text to output.txt file.

False

Returns:

Name Type Description
text str

The extracted text.

detailed_text Any

Text with extra information (returned by easyocr.Reader.readtext()).

Source code in ocred/ocr.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def ocr_sparse_text(
    self,
    *,
    languages: list[str] | None = ["en", "hi"],
    decoder: str | None = "greedy",
    save_output: bool | None = False,
) -> tuple[str, typing.Any]:
    """
    Performs OCR on sparse text and saves the image with boxes around the words.
    This method can be used to OCR documents in which the characters don't form
    any proper/meaningful sentences, or if there are very less meaningful sentences,
    for example - bills, sign-boards etc.

    Args:
        languages:
            A list of languages that the signboard possible has.
            Note: Provide only the languages that are present in the image, adding
            additional languages misguides the model.
        decoder:
            If the document has a larger number of meaningful sentences then use
            "beamsearch". For most of the cases "greedy" works very well.
        save_output:
            Saves the text to `output.txt` file.

    Returns:
        text:
            The extracted text.
        detailed_text:
            Text with extra information (returned by easyocr.Reader.readtext()).
    """
    self.text = ""

    # reading the image using open-cv and easyocr
    img = cv2.imread(self.path)
    reader = easyocr.Reader(
        languages
    )  # slow for the first time (also depends upon CPU/GPU)
    self.detailed_text: typing.Any = reader.readtext(
        self.path, decoder=decoder, batch_size=5
    )

    for text in self.detailed_text:
        # extracting the coordinates to highlight the text
        coords_lower = text[0][:2]
        coords_upper = text[0][2:4]

        coords_lower.sort(key=lambda x: x[0])
        pt1 = [int(x) for x in coords_upper[-1]]

        coords_lower.sort(key=lambda x: x[0])
        pt2 = [int(x) for x in coords_lower[-1]]

        # highlighting the text
        cv2.rectangle(img, pt1, pt2, (0, 0, 255), 1)

        self.text = self.text + " " + text[-2]

    cv2.imwrite("OCR.png", img)

    if save_output:
        self.save_output()

    return self.text, self.detailed_text

process_extracted_text_from_invoice() ¤

This method processes the extracted text from invoices, and returns some useful information.

Returns:

Name Type Description
extracted_info dict[str, Any]

The extracted information.

Source code in ocred/ocr.py
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
def process_extracted_text_from_invoice(self) -> dict[str, typing.Any]:
    """
    This method processes the extracted text from invoices, and returns some useful
    information.

    Returns:
        extracted_info:
            The extracted information.
    """
    if not hasattr(self, "detailed_text"):
        raise ValueError("no invoice OCRed; OCR an invoice first")

    import nltk

    nltk.download("punkt")
    nltk.download("wordnet")
    nltk.download("stopwords")

    self.extracted_info = {}
    self.text_list = self.text.split(" ")

    # find date
    date_re = re.compile(
        r"^([1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1])(\.|-|\/)([1-9]|0[1-9]|1[0-2])(\.|-|\/)([0-9][0-9]|19[0-9][0-9]|20[0-9][0-9])$",
    )
    date = list(filter(date_re.match, self.text_list))

    # find phone number
    phone_number_re = re.compile(
        r"((\+*)((0[ -]*)*|((91 )*))((\d{12})+|(\d{10})+))|\d{5}([- ]*)\d{6}",
    )
    phone_number = list(filter(phone_number_re.match, self.text_list))

    # find place
    place = self.detailed_text[0][-2]

    # remove puntuations and redundant words
    tokenizer = nltk.RegexpTokenizer(r"\w+")
    removed_punctuation = tokenizer.tokenize(self.text)

    stop_words = set(nltk.corpus.stopwords.words("english"))
    post_processed_word_list = [
        w for w in removed_punctuation if w not in stop_words
    ]

    # find order number
    order_number: str | int = ""
    for i in range(len(post_processed_word_list)):
        if post_processed_word_list[i].lower() == "order":
            try:
                order_number = int(post_processed_word_list[i + 1])
            except Exception:
                order_number = post_processed_word_list[i + 2]
            break

    # find total price
    price: list[typing.Any] | str = ""

    # try finding a number with Rs, INR, ₹ or रे in front of it or Rs, INR at the end
    # of it
    try:
        price = re.findall(
            r"(?:Rs\.?|INR|₹\.?|रे\.?)\s*(\d+(?:[.,]\d+)*)|(\d+(?:[.,]\d+)*)\s*(?:Rs\.?|INR)",
            self.text,
        )
        price = list(map(float, price))
        price = max(price)
    # try finding numbers with "grand total" or "total" written in front of them
    except ValueError:
        lowered_list = [x.lower() for x in post_processed_word_list]
        if "grand" in lowered_list:
            indices = [i for i, x in enumerate(lowered_list) if x == "grand"]
            i = indices[-1]
            price = post_processed_word_list[i + 2]
        elif "total" in lowered_list:
            indices = [i for i, x in enumerate(lowered_list) if x == "total"]
            i = indices[-1]
            price = post_processed_word_list[i + 1]

    self.extracted_info.update(
        {
            "price": price,
            "date": date,
            "place": place,
            "order_number": order_number,
            "phone_number": phone_number,
            "post_processed_word_list": post_processed_word_list,
        }
    )

    return self.extracted_info

save_output() ¤

Saves the extracted text in the output.txt file.

Source code in ocred/ocr.py
281
282
283
284
285
286
287
def save_output(self) -> None:
    """Saves the extracted text in the `output.txt` file."""
    if not hasattr(self, "text"):
        raise ValueError("no text OCRed; OCR a document first")
    f = open("output.txt", "w", encoding="utf-8")
    f.write(self.text)
    f.close()

text_to_speech(*, lang='en') ¤

Danger

Deprecated since version v0.2.0. Instead, use gTTS manually.

Converts the extracted text to speech and save it as an MP3 file.

Parameters:

Name Type Description Default
lang str | None

Language of the processed text.

'en'
Source code in ocred/ocr.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def text_to_speech(self, *, lang: str | None = "en") -> None:
    """
    DANGER: Deprecated since version v0.2.0.
    Instead, use gTTS manually.

    Converts the extracted text to speech and save it as an MP3 file.

    Args:
        lang:
            Language of the processed text.
    """
    raise DeprecationWarning(
        "text_to_speech is deprecated and was removed in v0.2.0; use gTTS manually",
    )

Preprocessor class¤

Preprocesses an image and makes it ready for OCR.

Parameters:

Name Type Description Default
image str | NDArray[int64] | NDArray[float64]

Path of the image or a numpy array.

required

Examples:

>>> import sys
>>> sys.displayhook = lambda x: None
>>> import cv2
>>> from scipy import ndimage
>>> from ocred import Preprocessor
>>> # scan the image and copy the scanned image
>>> preprocessed = Preprocessor("images/CosmosTwo.jpg")
>>> # scan the image and copy the scanned image
>>> preprocessed.scan()
>>> orig = preprocessed.img.copy()
>>> # remove noise
>>> preprocessed.remove_noise()
>>> # thicken the ink to draw Hough lines better
>>> preprocessed.thicken_font()
>>> # calculate the median angle of all the Hough lines
>>> _, median_angle = preprocessed.rotate()
>>> # rotate the original scanned image
>>> rotated = ndimage.rotate(orig, median_angle)
>>> # remove noise again
>>> preprocessed = Preprocessor(rotated)
>>> preprocessed.remove_noise()
>>> cv2.imwrite("preprocessed.png", preprocessed.img)
True
Source code in ocred/preprocessing.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
class Preprocessor:
    """
    Preprocesses an image and makes it ready for OCR.

    Args:
        image:
            Path of the image or a numpy array.

    Examples:
        >>> import sys
        >>> sys.displayhook = lambda x: None
        >>> import cv2
        >>> from scipy import ndimage
        >>> from ocred import Preprocessor
        >>> # scan the image and copy the scanned image
        >>> preprocessed = Preprocessor("images/CosmosTwo.jpg")
        >>> # scan the image and copy the scanned image
        >>> preprocessed.scan()
        >>> orig = preprocessed.img.copy()
        >>> # remove noise
        >>> preprocessed.remove_noise()
        >>> # thicken the ink to draw Hough lines better
        >>> preprocessed.thicken_font()
        >>> # calculate the median angle of all the Hough lines
        >>> _, median_angle = preprocessed.rotate()
        >>> # rotate the original scanned image
        >>> rotated = ndimage.rotate(orig, median_angle)
        >>> # remove noise again
        >>> preprocessed = Preprocessor(rotated)
        >>> preprocessed.remove_noise()
        >>> cv2.imwrite("preprocessed.png", preprocessed.img)
        True
    """

    def __init__(
        self,
        image: str | npt.NDArray[np.int64] | npt.NDArray[np.float64],
    ) -> None:
        if isinstance(image, str):
            self.img = cv2.imread(image)
        else:
            self.img = image

    def remove_noise(
        self,
        *,
        save: bool | None = False,
        inplace: bool | None | None = None,
        iterations: int | None = 1,
        overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
    ) -> npt.NDArray[np.int64] | npt.NDArray[np.float64]:
        """
        Removes noise from an image.

        Args:
            save:
                Saves the resultant image.
            iterations:
                Number of times the image is processed.
            inplace:
                DANGER: Deprecated since version v0.3.0.
                Was intended to edit the image inplace, but never actually worked.
            overriden_image:
                DANGER: Deprecated since version v0.3.0.
                Was used to pass a new image to the method but was redundant and buggy.

        Returns:
            noise_free_image:
                The noise free image.
        """
        if inplace is not None:
            raise DeprecationWarning(_dep_warn_inplace)
        if overriden_image is not None:
            raise DeprecationWarning(_dep_warn_overriden_image)

        kernel: npt.NDArray[np.int64] = np.ones((1, 1), np.uint8)
        self.img = cv2.dilate(self.img, kernel, iterations=iterations)
        kernel = np.ones((1, 1), np.uint8)
        self.img = cv2.erode(self.img, kernel, iterations=iterations)
        self.img = cv2.morphologyEx(self.img, cv2.MORPH_CLOSE, kernel)
        self.img = cv2.medianBlur(self.img, 3)

        if save:
            cv2.imwrite("noise_free.png", self.img)

        return self.img

    def thicken_font(
        self,
        *,
        save: bool | None = False,
        inplace: bool | None | None = None,
        iterations: int | None = 2,
        overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
    ) -> npt.NDArray[np.int64] | npt.NDArray[np.float64]:
        """
        Thickens the ink of an image.

        Args:
            save:
                Saves the resultant image.
            iterations:
                Number of times the image is processed.
            inplace:
                DANGER: Deprecated since version v0.3.0.
                Was intended to edit the image inplace, but never actually worked.
            overriden_image:
                DANGER: Deprecated since version v0.3.0.
                Was used to pass a new image to the method but was redundant and buggy.

        Returns:
            thickened_image:
                The thickened image.
        """
        if inplace is not None:
            raise DeprecationWarning(_dep_warn_inplace)
        if overriden_image is not None:
            raise DeprecationWarning(_dep_warn_overriden_image)

        self.img = cv2.bitwise_not(self.img)
        kernel: npt.NDArray[np.int64] = np.ones((2, 2), np.uint8)
        self.img = cv2.dilate(self.img, kernel, iterations=iterations)
        self.img = cv2.bitwise_not(self.img)

        if save:
            cv2.imwrite("thick_font.png", self.img)

        return self.img

    def scan(
        self,
        *,
        save: bool | None = False,
        inplace: bool | None | None = None,
        overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
    ) -> npt.NDArray[np.int64] | npt.NDArray[np.float64]:
        """
        Transforms an image/document view into B&W view (proper scanned colour scheme).

        Args:
            save:
                Saves the resultant image.
            inplace:
                DANGER: Deprecated since version v0.3.0.
                Was intended to edit the image inplace, but never actually worked.
            overriden_image:
                DANGER: Deprecated since version v0.3.0.
                Was used to pass a new image to the method but was redundant and buggy.

        Returns:
            scanned_image:
                The scanned image.
        """
        if inplace is not None:
            raise DeprecationWarning(_dep_warn_inplace)
        if overriden_image is not None:
            raise DeprecationWarning(_dep_warn_overriden_image)

        self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
        thr = threshold_local(self.img, 11, offset=10, method="gaussian")
        self.img = (self.img > thr).astype("uint8") * 255

        if save:
            cv2.imwrite("scanned.png", self.img)

        return self.img

    def rotate(
        self,
        *,
        save: bool | None = False,
        inplace: bool | None | None = None,
        overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
    ) -> tuple[npt.NDArray[np.int64] | npt.NDArray[np.float64], float]:
        """
        Rotates an image for a face-on view (view from the top).

        Args:
            save:
                Saves the resultant image.
            inplace:
                DANGER: Deprecated since version v0.3.0.
                Was intended to edit the image inplace, but never actually worked.
            overriden_image:
                DANGER: Deprecated since version v0.3.0.
                Was used to pass a new image to the method but was redundant and buggy.

        Returns:
            rotated_image:
                The rotated image.
            median_angle:
                The angly by which it is rotated.
        """
        if inplace is not None:
            raise DeprecationWarning(_dep_warn_inplace)
        if overriden_image is not None:
            raise DeprecationWarning(_dep_warn_overriden_image)

        img_edges = cv2.Canny(self.img, 100, 100, apertureSize=3)
        lines = cv2.HoughLinesP(
            img_edges,
            rho=1,
            theta=np.pi / 180.0,
            threshold=160,
            minLineLength=100,
            maxLineGap=10,
        )

        angles = []
        for [[x1, y1, x2, y2]] in lines:
            angle = math.degrees(math.atan2(y2 - y1, x2 - x1))
            angles.append(angle)

        median_angle = float(np.median(angles))
        self.img = ndimage.rotate(self.img, median_angle)

        if save:
            cv2.imwrite("rotated.png", self.img)

        return self.img, median_angle

remove_noise(*, save=False, inplace=None, iterations=1, overriden_image=None) ¤

Removes noise from an image.

Parameters:

Name Type Description Default
save bool | None

Saves the resultant image.

False
iterations int | None

Number of times the image is processed.

1
inplace bool | None | None

Danger

Deprecated since version v0.3.0. Was intended to edit the image inplace, but never actually worked.

None
overriden_image NDArray[int64] | NDArray[float64] | None

Danger

Deprecated since version v0.3.0. Was used to pass a new image to the method but was redundant and buggy.

None

Returns:

Name Type Description
noise_free_image NDArray[int64] | NDArray[float64]

The noise free image.

Source code in ocred/preprocessing.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def remove_noise(
    self,
    *,
    save: bool | None = False,
    inplace: bool | None | None = None,
    iterations: int | None = 1,
    overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
) -> npt.NDArray[np.int64] | npt.NDArray[np.float64]:
    """
    Removes noise from an image.

    Args:
        save:
            Saves the resultant image.
        iterations:
            Number of times the image is processed.
        inplace:
            DANGER: Deprecated since version v0.3.0.
            Was intended to edit the image inplace, but never actually worked.
        overriden_image:
            DANGER: Deprecated since version v0.3.0.
            Was used to pass a new image to the method but was redundant and buggy.

    Returns:
        noise_free_image:
            The noise free image.
    """
    if inplace is not None:
        raise DeprecationWarning(_dep_warn_inplace)
    if overriden_image is not None:
        raise DeprecationWarning(_dep_warn_overriden_image)

    kernel: npt.NDArray[np.int64] = np.ones((1, 1), np.uint8)
    self.img = cv2.dilate(self.img, kernel, iterations=iterations)
    kernel = np.ones((1, 1), np.uint8)
    self.img = cv2.erode(self.img, kernel, iterations=iterations)
    self.img = cv2.morphologyEx(self.img, cv2.MORPH_CLOSE, kernel)
    self.img = cv2.medianBlur(self.img, 3)

    if save:
        cv2.imwrite("noise_free.png", self.img)

    return self.img

rotate(*, save=False, inplace=None, overriden_image=None) ¤

Rotates an image for a face-on view (view from the top).

Parameters:

Name Type Description Default
save bool | None

Saves the resultant image.

False
inplace bool | None | None

Danger

Deprecated since version v0.3.0. Was intended to edit the image inplace, but never actually worked.

None
overriden_image NDArray[int64] | NDArray[float64] | None

Danger

Deprecated since version v0.3.0. Was used to pass a new image to the method but was redundant and buggy.

None

Returns:

Name Type Description
rotated_image NDArray[int64] | NDArray[float64]

The rotated image.

median_angle float

The angly by which it is rotated.

Source code in ocred/preprocessing.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def rotate(
    self,
    *,
    save: bool | None = False,
    inplace: bool | None | None = None,
    overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
) -> tuple[npt.NDArray[np.int64] | npt.NDArray[np.float64], float]:
    """
    Rotates an image for a face-on view (view from the top).

    Args:
        save:
            Saves the resultant image.
        inplace:
            DANGER: Deprecated since version v0.3.0.
            Was intended to edit the image inplace, but never actually worked.
        overriden_image:
            DANGER: Deprecated since version v0.3.0.
            Was used to pass a new image to the method but was redundant and buggy.

    Returns:
        rotated_image:
            The rotated image.
        median_angle:
            The angly by which it is rotated.
    """
    if inplace is not None:
        raise DeprecationWarning(_dep_warn_inplace)
    if overriden_image is not None:
        raise DeprecationWarning(_dep_warn_overriden_image)

    img_edges = cv2.Canny(self.img, 100, 100, apertureSize=3)
    lines = cv2.HoughLinesP(
        img_edges,
        rho=1,
        theta=np.pi / 180.0,
        threshold=160,
        minLineLength=100,
        maxLineGap=10,
    )

    angles = []
    for [[x1, y1, x2, y2]] in lines:
        angle = math.degrees(math.atan2(y2 - y1, x2 - x1))
        angles.append(angle)

    median_angle = float(np.median(angles))
    self.img = ndimage.rotate(self.img, median_angle)

    if save:
        cv2.imwrite("rotated.png", self.img)

    return self.img, median_angle

scan(*, save=False, inplace=None, overriden_image=None) ¤

Transforms an image/document view into B&W view (proper scanned colour scheme).

Parameters:

Name Type Description Default
save bool | None

Saves the resultant image.

False
inplace bool | None | None

Danger

Deprecated since version v0.3.0. Was intended to edit the image inplace, but never actually worked.

None
overriden_image NDArray[int64] | NDArray[float64] | None

Danger

Deprecated since version v0.3.0. Was used to pass a new image to the method but was redundant and buggy.

None

Returns:

Name Type Description
scanned_image NDArray[int64] | NDArray[float64]

The scanned image.

Source code in ocred/preprocessing.py
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def scan(
    self,
    *,
    save: bool | None = False,
    inplace: bool | None | None = None,
    overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
) -> npt.NDArray[np.int64] | npt.NDArray[np.float64]:
    """
    Transforms an image/document view into B&W view (proper scanned colour scheme).

    Args:
        save:
            Saves the resultant image.
        inplace:
            DANGER: Deprecated since version v0.3.0.
            Was intended to edit the image inplace, but never actually worked.
        overriden_image:
            DANGER: Deprecated since version v0.3.0.
            Was used to pass a new image to the method but was redundant and buggy.

    Returns:
        scanned_image:
            The scanned image.
    """
    if inplace is not None:
        raise DeprecationWarning(_dep_warn_inplace)
    if overriden_image is not None:
        raise DeprecationWarning(_dep_warn_overriden_image)

    self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2GRAY)
    thr = threshold_local(self.img, 11, offset=10, method="gaussian")
    self.img = (self.img > thr).astype("uint8") * 255

    if save:
        cv2.imwrite("scanned.png", self.img)

    return self.img

thicken_font(*, save=False, inplace=None, iterations=2, overriden_image=None) ¤

Thickens the ink of an image.

Parameters:

Name Type Description Default
save bool | None

Saves the resultant image.

False
iterations int | None

Number of times the image is processed.

2
inplace bool | None | None

Danger

Deprecated since version v0.3.0. Was intended to edit the image inplace, but never actually worked.

None
overriden_image NDArray[int64] | NDArray[float64] | None

Danger

Deprecated since version v0.3.0. Was used to pass a new image to the method but was redundant and buggy.

None

Returns:

Name Type Description
thickened_image NDArray[int64] | NDArray[float64]

The thickened image.

Source code in ocred/preprocessing.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def thicken_font(
    self,
    *,
    save: bool | None = False,
    inplace: bool | None | None = None,
    iterations: int | None = 2,
    overriden_image: npt.NDArray[np.int64] | npt.NDArray[np.float64] | None = None,
) -> npt.NDArray[np.int64] | npt.NDArray[np.float64]:
    """
    Thickens the ink of an image.

    Args:
        save:
            Saves the resultant image.
        iterations:
            Number of times the image is processed.
        inplace:
            DANGER: Deprecated since version v0.3.0.
            Was intended to edit the image inplace, but never actually worked.
        overriden_image:
            DANGER: Deprecated since version v0.3.0.
            Was used to pass a new image to the method but was redundant and buggy.

    Returns:
        thickened_image:
            The thickened image.
    """
    if inplace is not None:
        raise DeprecationWarning(_dep_warn_inplace)
    if overriden_image is not None:
        raise DeprecationWarning(_dep_warn_overriden_image)

    self.img = cv2.bitwise_not(self.img)
    kernel: npt.NDArray[np.int64] = np.ones((2, 2), np.uint8)
    self.img = cv2.dilate(self.img, kernel, iterations=iterations)
    self.img = cv2.bitwise_not(self.img)

    if save:
        cv2.imwrite("thick_font.png", self.img)

    return self.img