sbt-idp/cope2n-api/fwd_api/api/ctel_view.py

507 lines
22 KiB
Python
Raw Normal View History

2023-11-30 11:19:06 +00:00
import time
import uuid
from wsgiref.util import FileWrapper
2023-12-05 05:59:06 +00:00
import base64
2023-11-30 11:19:06 +00:00
from django.core.files.uploadedfile import TemporaryUploadedFile
from django.db import transaction
from django.http import HttpResponse, JsonResponse
from django.utils.crypto import get_random_string
from drf_spectacular.utils import extend_schema
from rest_framework import status, viewsets
from rest_framework.decorators import action
from rest_framework.response import Response
from typing import List
from fwd import settings
2023-12-05 05:59:06 +00:00
from ..celery_worker.client_connector import c_connector
2023-11-30 11:19:06 +00:00
from ..annotation.api import throw_on_failure
from ..constant.common import allowed_p_type, ProcessType, REQUEST_ID, FOLDER_TYPE, \
2023-12-05 05:59:06 +00:00
FolderFileType, TEMPLATE_ID, EntityStatus, standard_ocr_list, pdf_extensions, image_extensions, allowed_file_extensions
2023-11-30 11:19:06 +00:00
from ..exception.exceptions import RequiredFieldException, InvalidException, NotFoundException, \
2023-12-07 12:52:29 +00:00
PermissionDeniedException, LimitReachedException, LockedEntityException, FileContentInvalidException, ServiceTimeoutException
2023-11-30 11:19:06 +00:00
from ..models import SubscriptionRequest, UserProfile, SubscriptionRequestFile, OcrTemplate, Subscription
from ..response.ReportSerializer import ReportSerializer
from ..utils import FileUtils, ProcessUtil
class CtelViewSet(viewsets.ViewSet):
lookup_field = "username"
size_to_compress = settings.SIZE_TO_COMPRESS
@extend_schema(request={
'multipart/form-data': {
'type': 'object',
'properties': {
'file': {
'type': 'string',
'format': 'binary'
},
'processType': {
'type': 'string'
},
},
'required': {'file', 'processType'}
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="image/process", methods=["POST"])
2023-12-05 05:59:06 +00:00
# @transaction.atomic
2023-11-30 11:19:06 +00:00
def process(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request)
user = user_info.user
sub = user_info.current_sub
validated_data = ProcessUtil.validate_ocr_request_and_get(request, sub)
provider_code = 'SAP'
rq_id = provider_code + uuid.uuid4().hex
file_obj: TemporaryUploadedFile = validated_data['file']
2023-12-05 05:59:06 +00:00
file_extension = file_obj.name.split(".")[-1].lower()
2023-11-30 11:19:06 +00:00
p_type = validated_data['type']
file_name = f"temp_{rq_id}.{file_extension}"
total_page = 1
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
from ..celery_worker.client_connector import c_connector
file_obj.seek(0)
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, 100)
2023-12-05 05:59:06 +00:00
S3_path = FileUtils.save_to_S3(file_name, new_request, file_path)
2023-11-30 11:19:06 +00:00
2023-12-05 05:59:06 +00:00
files: [{
"file_name": file_name,
"file_path": file_path, # local path to file
"file_type": ""
},]
2023-11-30 11:19:06 +00:00
if file_extension in pdf_extensions:
2023-12-05 05:59:06 +00:00
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files))
2023-11-30 11:19:06 +00:00
# b_url = ProcessUtil.process_pdf_file(file_name, file_obj, new_request, user)
elif file_extension in image_extensions:
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user)
j_time = time.time()
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
print(f"[INFO]: b_url: {b_url}")
if p_type in standard_ocr_list:
ProcessUtil.send_to_queue2(rq_id, sub.id, b_url, user.id, p_type)
if p_type == ProcessType.TEMPLATE_MATCHING.value:
ProcessUtil.send_template_queue(rq_id, b_url, validated_data['template'], user.id)
else:
return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {file_extension} is now allowed"})
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
@extend_schema(request={
'multipart/form-data': {
'type': 'object',
'properties': {
'imei_files': {
'type': 'array',
'items': {
'type': 'string',
'format': 'binary'
}
},
'invoice_file': {
'type': 'string',
'format': 'binary'
},
'redemption_ID': {
'type': 'string'
},
},
2023-12-06 10:14:56 +00:00
'required': {'imei_files'}
2023-11-30 11:19:06 +00:00
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/process", methods=["POST"])
2023-12-05 05:59:06 +00:00
# @transaction.atomic
2023-11-30 11:19:06 +00:00
def processes(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request)
user = user_info.user
sub = user_info.current_sub
validated_data = ProcessUtil.sbt_validate_ocr_request_and_get(request, sub)
provider_code = 'SAP'
rq_id = provider_code + uuid.uuid4().hex
imei_file_objs: List[TemporaryUploadedFile] = validated_data['imei_file']
invoice_file_objs: List[TemporaryUploadedFile] = validated_data['invoice_file']
files = {
"imei": imei_file_objs,
"invoice": invoice_file_objs
}
total_page = len(files.keys())
# file_paths = []
list_urls = []
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
2023-12-05 05:59:06 +00:00
count = 0
compact_files = []
2023-11-30 11:19:06 +00:00
for doc_type, doc_files in files.items():
for i, doc_file in enumerate(doc_files):
_ext = doc_file.name.split(".")[-1]
2023-12-05 05:59:06 +00:00
if _ext not in allowed_file_extensions:
2023-11-30 11:19:06 +00:00
return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {_ext} is now allowed"})
_name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}"
doc_file.seek(0)
2023-12-05 05:59:06 +00:00
file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100)
S3_path = FileUtils.save_to_S3(_name, new_request, file_path)
2023-11-30 11:19:06 +00:00
count += 1
2023-12-05 05:59:06 +00:00
this_file = {
"file_name": _name,
"file_path": file_path,
"file_type": doc_type
}
compact_files.append(this_file)
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
2023-11-30 11:19:06 +00:00
j_time = time.time()
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
2023-12-07 10:05:58 +00:00
@extend_schema(request={
'multipart/form-data': {
'type': 'object',
'properties': {
'imei_files': {
'type': 'array',
'items': {
'type': 'string',
'format': 'binary'
}
},
'invoice_file': {
'type': 'string',
'format': 'binary'
},
'redemption_ID': {
'type': 'string'
},
},
'required': {'imei_files'}
}
}, responses=None, tags=['ocr'])
@action(detail=False, url_path="images/process_sync", methods=["POST"])
# @transaction.atomic
def processes_sync(self, request):
s_time = time.time()
# print(30*"=")
# print(f"[DEBUG]: request: {request}")
# print(30*"=")
user_info = ProcessUtil.get_user(request)
user = user_info.user
sub = user_info.current_sub
validated_data = ProcessUtil.sbt_validate_ocr_request_and_get(request, sub)
provider_code = 'SAP'
rq_id = provider_code + uuid.uuid4().hex
imei_file_objs: List[TemporaryUploadedFile] = validated_data['imei_file']
invoice_file_objs: List[TemporaryUploadedFile] = validated_data['invoice_file']
files = {
"imei": imei_file_objs,
"invoice": invoice_file_objs
}
total_page = len(files.keys())
# file_paths = []
list_urls = []
p_type = validated_data['type']
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub)
new_request.save()
count = 0
compact_files = []
for doc_type, doc_files in files.items():
for i, doc_file in enumerate(doc_files):
_ext = doc_file.name.split(".")[-1]
if _ext not in allowed_file_extensions:
return JsonResponse(status=status.HTTP_406_NOT_ACCEPTABLE, data={"request_id": rq_id, "message": f"File {_ext} is now allowed"})
_name = f"temp_{doc_type}_{rq_id}_{i}.{_ext}"
doc_file.seek(0)
file_path = FileUtils.resize_and_save_file(_name, new_request, doc_file, 100)
S3_path = FileUtils.save_to_S3(_name, new_request, file_path)
count += 1
this_file = {
"file_name": _name,
"file_path": file_path,
"file_type": doc_type
}
compact_files.append(this_file)
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
j_time = time.time()
time_out = 120
start = time.time()
while time.time() - start < time_out:
time.sleep(0.1)
report_filter = SubscriptionRequest.objects.filter(request_id=rq_id)
if len(report_filter) != 1:
raise InvalidException(excArgs='requestId')
if user_info.current_sub.id != report_filter[0].subscription.id:
raise InvalidException(excArgs="user")
if int(report_filter[0].process_type) == ProcessType.FI_INVOICE.value:
data = report_filter[0].predict_result
xml_as_string = ""
if data and 'content' in data and 'combine_results' in data['content'] and 'xml' in data['content']['combine_results']:
xml_as_string = data['content']['combine_results']['xml']
xml_as_string = xml_as_string.replace("\n", "").replace("\\", "")
return HttpResponse(xml_as_string,content_type="text/xml")
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
serializer.is_valid()
if report_filter[0].status == 400:
raise FileContentInvalidException()
if len(serializer.data) == 0:
continue
if serializer.data[0].get("data", None) is None:
continue
if serializer.data[0]["data"].get("status", 200) != 200:
continue
return Response(status=status.HTTP_200_OK, data=serializer.data[0])
2023-12-07 12:52:29 +00:00
raise ServiceTimeoutException(excArgs=f"{rq_id}")
2023-12-07 10:05:58 +00:00
2023-11-30 11:19:06 +00:00
@extend_schema(request=None, responses=None, tags=['data'])
@extend_schema(request=None, responses=None, tags=['templates'], methods=['GET'])
@action(detail=False, url_path=r"media/(?P<folder_type>\w+)/(?P<uq_id>\w+)", methods=["GET"])
def get_file_v2(self, request, uq_id=None, folder_type=None):
user_data = request.user_data
content_type = "image/png"
file_name: str = request.query_params.get('file_name', None)
if folder_type is None:
raise RequiredFieldException(excArgs=FOLDER_TYPE)
if uq_id is None:
raise RequiredFieldException(excArgs=REQUEST_ID)
if folder_type == 'templates':
temps: list = OcrTemplate.objects.filter(id=uq_id)
if len(temps) != 1:
raise NotFoundException(excArgs='file')
temp: OcrTemplate = temps[0]
user = temp.subscription.user
content_type = 'application/pdf' if temp.file_name.split(".")[-1] in pdf_extensions else content_type
if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
raise PermissionDeniedException()
print(temp.file_path)
return HttpResponse(FileWrapper(FileUtils.get_file(temp.file_path)), status=status.HTTP_200_OK,
headers={'Content-Disposition': 'filename={fn}'.format(fn=temp.file_name)},
content_type=content_type)
elif folder_type == 'requests':
if file_name is None:
raise RequiredFieldException(excArgs='file_name')
try:
rqs = SubscriptionRequest.objects.filter(request_id=uq_id)
if len(rqs) != 1:
raise NotFoundException(excArgs='file')
rq = rqs[0]
user = rq.subscription.user
content_type = 'application/pdf' if file_name.split(".")[-1] in pdf_extensions else content_type
if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
raise PermissionDeniedException()
file_data = SubscriptionRequestFile.objects.filter(request=rq, file_name=file_name)[0]
except IndexError:
raise NotFoundException(excArgs='file')
return HttpResponse(FileWrapper(FileUtils.get_file(file_data.file_path)), status=status.HTTP_200_OK,
headers={'Content-Disposition': 'filename={fn}'.format(fn=file_data.file_name)},
content_type=content_type)
else:
raise InvalidException(excArgs='type')
@extend_schema(request=None, responses=None, tags=['data'])
@action(detail=False, url_path=r"v2/media/request/(?P<media_id>\w+)", methods=["GET"])
def get_file_v3(self, request, media_id=None):
user_info = ProcessUtil.get_user(request)
sub = user_info.current_sub
content_type = "image/png"
if media_id is None:
raise RequiredFieldException(excArgs=REQUEST_ID)
try:
media_list = SubscriptionRequestFile.objects.filter(code=media_id)
if len(media_list) != 1:
raise LockedEntityException(excArgs='media')
media_data: SubscriptionRequestFile = media_list[0]
if media_data.request.subscription.id != sub.id:
raise PermissionDeniedException()
file_name = media_data.file_name
content_type = 'application/pdf' if file_name.split(".")[-1] in pdf_extensions else content_type
except IndexError:
raise NotFoundException(excArgs='file')
return HttpResponse(FileWrapper(FileUtils.get_file(media_data.file_path)), status=status.HTTP_200_OK,
headers={'Content-Disposition': 'filename={fn}'.format(fn=file_name)},
content_type=content_type)
from rest_framework.renderers import JSONRenderer
from rest_framework_xml.renderers import XMLRenderer
@extend_schema(request=None, responses=None, tags=['data'])
@throw_on_failure(InvalidException(excArgs='data'))
@action(detail=False, url_path=r"result/(?P<request_id>\w+)", methods=["GET"], renderer_classes=[JSONRenderer, XMLRenderer])
def get_result(self, request, request_id=None):
user_info = ProcessUtil.get_user(request)
if request_id is None:
raise RequiredFieldException(excArgs='requestId')
report_filter = SubscriptionRequest.objects.filter(request_id=request_id)
if len(report_filter) != 1:
raise InvalidException(excArgs='requestId')
if user_info.current_sub.id != report_filter[0].subscription.id:
raise InvalidException(excArgs="user")
if int(report_filter[0].process_type) == ProcessType.FI_INVOICE.value:
data = report_filter[0].predict_result
xml_as_string = ""
if data and 'content' in data and 'combine_results' in data['content'] and 'xml' in data['content']['combine_results']:
xml_as_string = data['content']['combine_results']['xml']
xml_as_string = xml_as_string.replace("\n", "").replace("\\", "")
# return Response(status=status.HTTP_200_OK, data=xml_as_string, content_type="application/xml; charset=utf-8")
# return HttpResponse(xml_as_string,content_type="text/xml")
return HttpResponse(xml_as_string,content_type="text/xml")
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
serializer.is_valid()
# print(f"[DEBUG]: result: {serializer.data[0]}")
2023-12-05 05:59:06 +00:00
if report_filter[0].status == 400:
raise FileContentInvalidException()
2023-11-30 11:19:06 +00:00
return Response(status=status.HTTP_200_OK, data=serializer.data[0])
@throw_on_failure(InvalidException(excArgs='data'))
@action(detail=False, url_path=r"rsa/(?P<request_id>\w+)", methods=["GET"])
def get_result2(self, request, request_id=None):
user_info = ProcessUtil.get_user(request)
if request_id is None:
raise RequiredFieldException(excArgs='requestId')
report_filter = SubscriptionRequest.objects.filter(request_id=request_id)
if len(report_filter) != 1:
raise InvalidException(excArgs='requestId')
if user_info.current_sub.id != report_filter[0].subscription.id:
raise InvalidException(excArgs="user")
if int(report_filter[0].process_type) == ProcessType.FI_INVOICE.value:
data = report_filter[0].predict_result
xml_as_string = ""
if data and 'content' in data and 'combine_results' in data['content'] and 'xml' in data['content']['combine_results']:
xml_as_string = data['content']['combine_results']['xml']
xml_as_string = xml_as_string.replace("\n", "").replace("\\", "")
# return Response(status=status.HTTP_200_OK, data=xml_as_string, content_type="application/xml; charset=utf-8")
return HttpResponse(xml_as_string,content_type="text/xml")
serializer: ReportSerializer = ReportSerializer(data=report_filter, many=True)
serializer.is_valid()
return Response(status=status.HTTP_200_OK, data=serializer.data[0])
@action(detail=False, url_path="image/process/app", methods=["POST"])
2023-12-05 05:59:06 +00:00
# @transaction.atomic
2023-11-30 11:19:06 +00:00
def process_app(self, request):
app_id = "THIS_IS_OUR_APP_TEST_ACCOUNT_9123"
users = UserProfile.objects.filter(sync_id=app_id)
if len(users) > 1:
raise InvalidException(excArgs='user')
if len(users) == 0:
user = UserProfile(sync_id=app_id, limit_total_pages=1000, status=EntityStatus.ACTIVE.value)
user.save()
else:
user = users[0]
subs = Subscription.objects.filter(user=user)
if len(subs) > 1:
raise InvalidException(excArgs='sub')
if len(subs) == 0:
sub = Subscription(user=user, limit_token=10000, current_token=0, status=EntityStatus.ACTIVE.value)
sub.save()
else:
sub = subs[0]
cur = sub.current_token
lim = sub.limit_token
list_file = request.data.getlist('file')
s_time = time.time()
if "processType" not in request.data or int(request.data['processType']) not in allowed_p_type:
raise InvalidException(excArgs='processType')
p_type: int = int(request.data['processType'])
if cur + ProcessUtil.token_value(p_type) >= lim:
raise LimitReachedException(excArgs=('Number of request', str(sub.limit_token), 'times'))
FileUtils.validate_list_file(list_file)
if ("templateId" not in request.data) and p_type == ProcessType.TEMPLATE_MATCHING.value:
raise InvalidException(excArgs=TEMPLATE_ID)
provider_code = 'Ctel'
rq_id = provider_code + str(p_type) + get_random_string(5) + str(round(time.time() * 1000))
file_obj: TemporaryUploadedFile = list_file[0]
file_name = "temp_file_" + rq_id + get_random_string(2) + ".jpg"
total_page = 1
new_request: SubscriptionRequest = SubscriptionRequest(pages=total_page,
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code, subscription=sub)
new_request.save()
if p_type == ProcessType.ID_CARD.value or p_type == ProcessType.INVOICE.value or p_type == ProcessType.OCR_WITH_BOX.value or p_type == ProcessType.DRIVER_LICENSE.value:
if file_obj.size > self.size_to_compress:
quality = 90
else:
quality = 100
file_path = FileUtils.resize_and_save_file(file_name, new_request, file_obj, quality)
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=new_request,
file_name=file_name)
new_request_file.save()
b_url = FileUtils.build_url(FolderFileType.REQUESTS.value, new_request.request_id, user.id, file_name)
j_time = time.time()
print("Json {}".format(j_time - s_time))
ProcessUtil.send_to_queue2(rq_id, sub.id, b_url, user.id, p_type)
return JsonResponse(status=status.HTTP_200_OK, data={"request_id": rq_id})
return JsonResponse(status=status.HTTP_502_BAD_GATEWAY, data={"message": "unknown_error"})