156 lines
3.8 KiB
Python
Executable File
156 lines
3.8 KiB
Python
Executable File
from OCRBase.text_recognition import ocr_predict
|
|
import cv2
|
|
from shapely.geometry import Polygon
|
|
import urllib
|
|
import numpy as np
|
|
|
|
def check_percent_overlap_bbox(boxA, boxB):
|
|
"""check percent box A in boxB
|
|
|
|
Args:
|
|
boxA (_type_): _description_
|
|
boxB (_type_): _description_
|
|
|
|
Returns:
|
|
Float: percent overlap bbox
|
|
"""
|
|
# determine the (x, y)-coordinates of the intersection rectangle
|
|
box_shape_1 = [
|
|
[boxA[0], boxA[1]],
|
|
[boxA[2], boxA[1]],
|
|
[boxA[2], boxA[3]],
|
|
[boxA[0], boxA[3]],
|
|
]
|
|
|
|
# Give dimensions of shape 2
|
|
box_shape_2 = [
|
|
[boxB[0], boxB[1]],
|
|
[boxB[2], boxB[1]],
|
|
[boxB[2], boxB[3]],
|
|
[boxB[0], boxB[3]],
|
|
]
|
|
# Draw polygon 1 from shape 1
|
|
# dimensions
|
|
polygon_1 = Polygon(box_shape_1)
|
|
|
|
# Draw polygon 2 from shape 2
|
|
# dimensions
|
|
polygon_2 = Polygon(box_shape_2)
|
|
|
|
# Calculate the intersection of
|
|
# bounding boxes
|
|
intersect = polygon_1.intersection(polygon_2).area / polygon_1.area
|
|
|
|
return intersect
|
|
|
|
|
|
def check_box_in_box(boxA, boxB):
|
|
"""check boxA in boxB
|
|
|
|
Args:
|
|
boxA (_type_): _description_
|
|
boxB (_type_): _description_
|
|
|
|
Returns:
|
|
Boolean: True if boxA in boxB
|
|
"""
|
|
if (
|
|
boxA[0] >= boxB[0]
|
|
and boxA[1] >= boxB[1]
|
|
and boxA[2] <= boxB[2]
|
|
and boxA[3] <= boxB[3]
|
|
):
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
|
|
def word_to_line_image_origin(list_words, bbox):
|
|
"""use for predict image with bbox selected
|
|
|
|
Args:
|
|
list_words (_type_): _description_
|
|
bbox (_type_): _description_
|
|
|
|
Returns:
|
|
_type_: _description_
|
|
"""
|
|
texts, boundingboxes = [], []
|
|
for line in list_words:
|
|
if line.text == "":
|
|
continue
|
|
else:
|
|
# convert to bbox image original
|
|
boundingbox = line.boundingbox
|
|
boundingbox = list(boundingbox)
|
|
boundingbox[0] = boundingbox[0] + bbox[0]
|
|
boundingbox[1] = boundingbox[1] + bbox[1]
|
|
boundingbox[2] = boundingbox[2] + bbox[0]
|
|
boundingbox[3] = boundingbox[3] + bbox[1]
|
|
texts.append(line.text)
|
|
boundingboxes.append(boundingbox)
|
|
return texts, boundingboxes
|
|
|
|
|
|
def word_to_line(list_words):
|
|
"""use for predict full image
|
|
|
|
Args:
|
|
list_words (_type_): _description_
|
|
"""
|
|
texts, boundingboxes = [], []
|
|
for line in list_words:
|
|
print(line.text)
|
|
if line.text == "":
|
|
continue
|
|
else:
|
|
boundingbox = line.boundingbox
|
|
boundingbox = list(boundingbox)
|
|
texts.append(line.text)
|
|
boundingboxes.append(boundingbox)
|
|
return texts, boundingboxes
|
|
|
|
|
|
def predict(page_numb, image_url):
|
|
"""predict text from image
|
|
|
|
Args:
|
|
image_path (String): path image to predict
|
|
list_id (List): List id of bbox selected
|
|
list_bbox (List): List bbox selected
|
|
|
|
Returns:
|
|
Dict: Dict result of prediction
|
|
"""
|
|
|
|
req = urllib.request.urlopen(image_url)
|
|
arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
|
|
image = cv2.imdecode(arr, -1)
|
|
list_lines = ocr_predict(image)
|
|
texts, boundingboxes = word_to_line(list_lines)
|
|
result = {}
|
|
texts_replace = []
|
|
for text in texts:
|
|
if "✪" in text:
|
|
text = text.replace("✪", " ")
|
|
texts_replace.append(text)
|
|
else:
|
|
texts_replace.append(text)
|
|
result["texts"] = texts_replace
|
|
result["boundingboxes"] = boundingboxes
|
|
|
|
output_dict = {
|
|
"document_type": "ocr-base",
|
|
"fields": []
|
|
}
|
|
field = {
|
|
"label": "Text",
|
|
"value": result["texts"],
|
|
"box": result["boundingboxes"],
|
|
"confidence": 0.98,
|
|
"page": page_numb
|
|
}
|
|
output_dict['fields'].append(field)
|
|
|
|
return output_dict
|