Add feedback API, parallel proccessing

This commit is contained in:
dx-tan 2023-12-08 19:49:00 +07:00
parent 8223606e95
commit 52ba638eb7
6 changed files with 169 additions and 11 deletions

View File

@ -67,6 +67,8 @@ class CtelViewSet(viewsets.ViewSet):
total_page = 1 total_page = 1
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
doc_type="all",
process_type=p_type, status=1, request_id=rq_id, process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code, provider_code=provider_code,
subscription=sub) subscription=sub)
@ -91,7 +93,7 @@ class CtelViewSet(viewsets.ViewSet):
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s") print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
print(f"[INFO]: b_url: {b_url}") print(f"[INFO]: b_url: {b_url}")
if p_type in standard_ocr_list: if p_type in standard_ocr_list:
ProcessUtil.send_to_queue2(rq_id, sub.id, b_url, user.id, p_type) ProcessUtil.send_to_queue2(rq_id + "_sub_0", sub.id, b_url, user.id, p_type)
if p_type == ProcessType.TEMPLATE_MATCHING.value: if p_type == ProcessType.TEMPLATE_MATCHING.value:
ProcessUtil.send_template_queue(rq_id, b_url, validated_data['template'], user.id) ProcessUtil.send_template_queue(rq_id, b_url, validated_data['template'], user.id)
else: else:
@ -149,6 +151,7 @@ class CtelViewSet(viewsets.ViewSet):
list_urls = [] list_urls = []
p_type = validated_data['type'] p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
process_type=p_type, status=1, request_id=rq_id, process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code, provider_code=provider_code,
subscription=sub) subscription=sub)
@ -226,6 +229,7 @@ class CtelViewSet(viewsets.ViewSet):
list_urls = [] list_urls = []
p_type = validated_data['type'] p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page, new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
pages_left=total_page,
process_type=p_type, status=1, request_id=rq_id, process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code, provider_code=provider_code,
subscription=sub) subscription=sub)
@ -286,6 +290,62 @@ class CtelViewSet(viewsets.ViewSet):
return Response(status=status.HTTP_200_OK, data=serializer.data[0]) return Response(status=status.HTTP_200_OK, data=serializer.data[0])
raise ServiceTimeoutException(excArgs=f"{rq_id}") raise ServiceTimeoutException(excArgs=f"{rq_id}")
@extend_schema(request={
'multipart/form-data': {
'type': 'object',
'properties': {
'request_id': {
'type': 'string',
},
'retailername': {
'type': 'string',
},
'sold_to_party': {
'type': 'string',
},
'purchase_date': {
'type': 'array',
'items': {
'type': 'string',
}
},
'imei_number': {
'type': 'array',
'items': {
'type': 'string',
}
},
},
'required': ['request_id', 'retailername', 'sold_to_party', 'purchase_date', 'imei_number']
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/feedback", methods=["POST"])
# @transaction.atomic
def feedback(self, request):
# s_time = time.time()
# user_info = ProcessUtil.get_user(request)
# user = user_info.user
# sub = user_info.current_sub
validated_data = ProcessUtil.sbt_validate_feedback(request)
rq_id = validated_data['request_id']
subcription_request = SubscriptionRequest.objects.filter(request_id=rq_id)
if len(subcription_request) == 0:
raise InvalidException(excArgs=f"{rq_id}")
subcription_request = subcription_request[0]
# Save to database
subcription_request.feedback_result = validated_data
subcription_request.save()
file_name = f"feedback_{rq_id}.json"
# Save to local
file_path = FileUtils.save_json_file(file_name, subcription_request, validated_data)
# Upload to S3
S3_path = FileUtils.save_to_S3(file_name, subcription_request, file_path)
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
@extend_schema(request=None, responses=None, tags=['data']) @extend_schema(request=None, responses=None, tags=['data'])
@extend_schema(request=None, responses=None, tags=['templates'], methods=['GET']) @extend_schema(request=None, responses=None, tags=['templates'], methods=['GET'])
@ -400,6 +460,10 @@ class CtelViewSet(viewsets.ViewSet):
# print(f"[DEBUG]: result: {serializer.data[0]}") # print(f"[DEBUG]: result: {serializer.data[0]}")
if report_filter[0].status == 400: if report_filter[0].status == 400:
raise FileContentInvalidException() raise FileContentInvalidException()
if report_filter[0].status == 100: # continue, only return when result is fullfilled
empty_data = serializer.data[0]
empty_data["data"] = None
return Response(status=status.HTTP_200_OK, data=empty_data)
return Response(status=status.HTTP_200_OK, data=serializer.data[0]) return Response(status=status.HTTP_200_OK, data=serializer.data[0])

View File

@ -81,7 +81,7 @@ def process_image_file(file_name: str, file_path, request, user) -> list:
@app.task(name='do_pdf') @app.task(name='do_pdf')
def process_pdf(rq_id, sub_id, p_type, user_id, files): def process_pdf(rq_id, sub_id, p_type, user_id, files):
""" """
pdf_files: [{ files: [{
"file_name": "", "file_name": "",
"file_path": "", # local path to file "file_path": "", # local path to file
"file_type": "" "file_type": ""
@ -93,11 +93,16 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0] new_request = SubscriptionRequest.objects.filter(request_id=rq_id)[0]
user = UserProfile.objects.filter(id=user_id).first() user = UserProfile.objects.filter(id=user_id).first()
b_urls = [] b_urls = []
new_request.pages = len(files)
new_request.pages_left = len(files)
for i, file in enumerate(files): for i, file in enumerate(files):
extension = file["file_name"].split(".")[-1].lower() extension = file["file_name"].split(".")[-1].lower()
if extension == "pdf": if extension == "pdf":
_b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user) _b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user)
if _b_urls is None: if _b_urls is None:
new_request.status = 400
new_request.save()
raise FileContentInvalidException raise FileContentInvalidException
for j in range(len(_b_urls)): for j in range(len(_b_urls)):
_b_urls[j]["doc_type"] = file["file_type"] _b_urls[j]["doc_type"] = file["file_type"]
@ -113,10 +118,20 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
start_process = time.time() start_process = time.time()
logger.info(f"BE proccessing time: {start_process - start}") logger.info(f"BE proccessing time: {start_process - start}")
if p_type in standard_ocr_list: # TODO: send to queue with different request_ids
ProcessUtil.send_to_queue2(rq_id, sub_id, b_urls, user_id, p_type) doc_type_string =""
if p_type == ProcessType.TEMPLATE_MATCHING.value: for i, b_url in enumerate(b_urls):
ProcessUtil.send_template_queue(rq_id, b_urls, '', user_id) fractorized_request_id = rq_id + f"_sub_{i}"
ProcessUtil.send_to_queue2(fractorized_request_id, sub_id, [b_url], user_id, p_type)
doc_type_string += "{},".format(b_url["doc_type"])
doc_type_string = doc_type_string[:-1]
new_request.doc_type = doc_type_string
new_request.save()
# if p_type in standard_ocr_list:
# ProcessUtil.send_to_queue2(rq_id, sub_id, b_urls, user_id, p_type)
# if p_type == ProcessType.TEMPLATE_MATCHING.value:
# ProcessUtil.send_template_queue(rq_id, b_urls, '', user_id)
@app.task(name='upload_file_to_s3') @app.task(name='upload_file_to_s3')
def upload_file_to_s3(local_file_path, s3_key): def upload_file_to_s3(local_file_path, s3_key):

View File

@ -4,8 +4,30 @@ import uuid
from fwd_api.celery_worker.worker import app from fwd_api.celery_worker.worker import app
from fwd_api.models import SubscriptionRequest from fwd_api.models import SubscriptionRequest
from django.utils.crypto import get_random_string from django.utils.crypto import get_random_string
from fwd_api.exception.exceptions import InvalidException
def aggregate_result(src_result, des_result, doc_type):
if src_result["status"] != 200:
return src_result
if not des_result:
return src_result
des_result["content"]["total_pages"] += 1
des_result["content"]["ocr_num_pages"] += 1
des_result["content"]["document"][0]["end_page"] += 1
if doc_type == "imei":
des_result["content"]["document"][0]["content"][3]["value"] += src_result["content"]["document"][0]["content"][3]["value"]
elif doc_type == "invoice":
des_result["content"]["document"][0]["content"][0]["value"] = src_result["content"]["document"][0]["content"][0]["value"]
des_result["content"]["document"][0]["content"][1]["value"] = src_result["content"]["document"][0]["content"][1]["value"]
des_result["content"]["document"][0]["content"][2]["value"] += src_result["content"]["document"][0]["content"][2]["value"]
elif doc_type == "all":
des_result.update(src_result)
else:
raise InvalidException(f"doc_type: {doc_type}")
return des_result
def print_id(rq_id): def print_id(rq_id):
print(" [x] Received {rq}".format(rq=rq_id)) print(" [x] Received {rq}".format(rq=rq_id))
@ -108,12 +130,28 @@ def process_invoice_sbt_result(rq_id, result):
print_id(f"[DEBUG]: Received SBT request with id {rq_id}") print_id(f"[DEBUG]: Received SBT request with id {rq_id}")
print_id(f"[DEBUG]: result: {result}") print_id(f"[DEBUG]: result: {result}")
try: try:
page_index = int(rq_id.split("_sub_")[1])
rq_id = rq_id.split("_sub_")[0]
rq: SubscriptionRequest = \ rq: SubscriptionRequest = \
SubscriptionRequest.objects.filter(request_id=rq_id, process_type=ProcessType.SBT_INVOICE.value)[0] SubscriptionRequest.objects.filter(request_id=rq_id, process_type=ProcessType.SBT_INVOICE.value)[0]
status = to_status(result) # status = to_status(result)
status = result.get("status", 200)
rq.pages_left = rq.pages_left - 1
done = rq.pages_left <= 0
# aggregate results from multiple pages
rq.predict_result = aggregate_result(result, rq.predict_result, rq.doc_type.split(",")[page_index])
print_id(f"[DEBUG]: status: {status}")
if status == 200:
if not done:
rq.status = 100 # continue
else:
rq.status = 200 # stop waiting
else:
rq.status = 404 # stop waiting
rq.predict_result = result
rq.status = status
rq.save() rq.save()
update_user(rq) update_user(rq)
except IndexError as e: except IndexError as e:

View File

@ -8,11 +8,13 @@ from fwd_api.models.Subscription import Subscription
class SubscriptionRequest(models.Model): class SubscriptionRequest(models.Model):
id = models.AutoField(primary_key=True) id = models.AutoField(primary_key=True)
pages: int = models.IntegerField() pages: int = models.IntegerField()
pages_left: int = models.IntegerField(default=1)
doc_type: str = models.CharField(max_length=100) doc_type: str = models.CharField(max_length=100)
request_id = models.CharField(max_length=200) # Change to request_id request_id = models.CharField(max_length=200) # Change to request_id
process_type = models.CharField(max_length=200) # driver/id/invoice process_type = models.CharField(max_length=200) # driver/id/invoice
provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel provider_code = models.CharField(max_length=200, default="Guest") # Request source FWD/CTel
predict_result = models.JSONField(null=True) predict_result = models.JSONField(null=True)
feedback_result = models.JSONField(null=True)
status = models.IntegerField() # 1: Processing(Pending) 2: PredictCompleted 3: ReturnCompleted status = models.IntegerField() # 1: Processing(Pending) 2: PredictCompleted 3: ReturnCompleted
subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE) subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE)
created_at = models.DateTimeField(default=timezone.now) created_at = models.DateTimeField(default=timezone.now)

View File

@ -1,7 +1,8 @@
import io import io
import os import os
import traceback import traceback
import base64 import base64
import json
from PIL import Image, ExifTags from PIL import Image, ExifTags
from django.core.files.uploadedfile import TemporaryUploadedFile from django.core.files.uploadedfile import TemporaryUploadedFile
@ -77,7 +78,6 @@ def save_byte_file(file_name: str, rq: SubscriptionRequest, file_bytes):
return file_path return file_path
def save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile): def save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile):
folder_path = get_folder_path(rq) folder_path = get_folder_path(rq)
is_exist = os.path.exists(folder_path) is_exist = os.path.exists(folder_path)
@ -93,6 +93,16 @@ def save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFi
return file_path return file_path
def save_json_file(file_name: str, rq: SubscriptionRequest, data: dict):
folder_path = get_folder_path(rq)
is_exist = os.path.exists(folder_path)
if not is_exist:
# Create a new directory because it does not exist
os.makedirs(folder_path)
file_path = os.path.join(folder_path, file_name)
with open(file_path, "w") as json_file:
json.dump(data, json_file)
return file_path
def delete_file_with_path(file_path: str) -> bool: def delete_file_with_path(file_path: str) -> bool:
try: try:

View File

@ -141,6 +141,35 @@ def sbt_validate_ocr_request_and_get(request, subscription):
return validated_data return validated_data
def sbt_validate_feedback(request):
validated_data = {}
request_id = request.data.get('request_id', None)
retailername = request.data.get("retailername", None)
sold_to_party = request.data.get("sold_to_party", None)
purchase_date = request.data.getlist("purchase_date", [])
imei_number = request.data.getlist("imei_number", [])
if not request_id:
raise RequiredFieldException(excArgs="request_id")
if not retailername:
raise RequiredFieldException(excArgs="retailername")
if not sold_to_party:
raise RequiredFieldException(excArgs="sold_to_party")
if len(purchase_date)==0:
raise RequiredFieldException(excArgs="purchase_date")
if len(imei_number)==0:
raise RequiredFieldException(excArgs="imei_number")
validated_data['request_id'] = request_id
validated_data['retailername'] = retailername
validated_data['sold_to_party'] = sold_to_party
validated_data['purchase_date'] = purchase_date
validated_data['imei_number'] = imei_number
return validated_data
def count_pages_in_pdf(pdf_file): def count_pages_in_pdf(pdf_file):
count = 0 count = 0
fh, temp_filename = tempfile.mkstemp() # make a tmp file fh, temp_filename = tempfile.mkstemp() # make a tmp file