Feature: is_test_request, Fix: csv not backup

This commit is contained in:
dx-tan 2024-01-17 16:28:50 +07:00
parent 4500d7dc1f
commit 94be43e798
13 changed files with 158 additions and 42 deletions

View File

@ -36,8 +36,11 @@ BASE_URL = env.str("BASE_URL", "")
BASE_UI_URL = env.str("BASE_UI_URL", "")
AUTH_TOKEN_LIFE_TIME = env.int("AUTH_TOKEN_LIFE_TIME", 0)
IMAGE_TOKEN_LIFE_TIME = env.int("IMAGE_TOKEN_LIFE_TIME", 0)
FI_USER_NAME = env.str("FI_USER_NAME", "secret_username")
FI_PASSWORD = env.str("FI_PASSWORD", 'admin')# SECURITY WARNING: don't run with debug turned on in production!
ADMIN_USER_NAME = env.str("ADMIN_USER_NAME", "")
ADMIN_PASSWORD = env.str("ADMIN_PASSWORD", '')# SECURITY WARNING: don't run with debug turned on in production!
STANDARD_USER_NAME = env.str("STANDARD_USER_NAME", "")
STANDARD_PASSWORD = env.str("STANDARD_PASSWORD", '')# SECURITY WARNING: don't run with debug turned on in production!
# Application definition
S3_ENDPOINT = env.str("S3_ENDPOINT", "")
S3_ACCESS_KEY = env.str("S3_ACCESS_KEY", "")

View File

@ -48,15 +48,23 @@ class CtelUserViewSet(viewsets.ViewSet):
print(serializer.is_valid(raise_exception=True))
data = serializer.validated_data
if data['username'] != settings.FI_USER_NAME or data['password'] != settings.FI_PASSWORD:
token_limit = 999999
if data['username'] == settings.ADMIN_USER_NAME:
if data['password'] != settings.ADMIN_PASSWORD:
raise NotAuthenticatedException()
elif data['username'] == settings.STANDARD_USER_NAME:
if data['password'] != settings.STANDARD_PASSWORD:
raise NotAuthenticatedException()
token_limit = 1000
else:
raise NotAuthenticatedException()
users = UserProfile.objects.filter(sync_id=settings.FI_USER_NAME)
users = UserProfile.objects.filter(sync_id=data['username'])
if len(users) > 1:
raise InvalidException(excArgs=USER_MESSAGE)
if len(users) == 0:
user = UserProfile(sync_id=settings.FI_USER_NAME, status=EntityStatus.ACTIVE.value)
user = UserProfile(sync_id=data['username'], status=EntityStatus.ACTIVE.value)
user.save()
else:
user = users[0]
@ -69,7 +77,7 @@ class CtelUserViewSet(viewsets.ViewSet):
if len(plans) > 1:
raise TrialOneException(excArgs=PLAN_MESSAGE)
if len(plans) == 0:
plan = PricingPlan(code=p_code, duration=365, token_limitations=999999)
plan = PricingPlan(code=p_code, duration=365, token_limitations=token_limit)
plan.save()
else:
plan: PricingPlan = plans[0]
@ -84,9 +92,9 @@ class CtelUserViewSet(viewsets.ViewSet):
else:
sub = subs[0]
return Response(status=status.HTTP_200_OK, data={
'user_id': 'SBT',
'user_name': settings.FI_USER_NAME,
'token': sds_authenticator.generate_token(user_id=settings.FI_USER_NAME, internal_id=user.id, status=EntityStatus.ACTIVE.value, sub_id=sub.id)
'user_id': user.id,
'user_name': data['username'],
'token': sds_authenticator.generate_token(user_id=data['username'], internal_id=user.id, status=EntityStatus.ACTIVE.value, sub_id=sub.id)
})

View File

@ -70,7 +70,6 @@ class CtelViewSet(viewsets.ViewSet):
new_request: SubscriptionRequest = SubscriptionRequest(
pages=total_page,
pages_left=total_page,
doc_type="all",
process_type=p_type, status=1, request_id=rq_id,
provider_code=provider_code,
subscription=sub,
@ -91,7 +90,7 @@ class CtelViewSet(viewsets.ViewSet):
if file_extension in pdf_extensions:
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, files))
elif file_extension in image_extensions:
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user)
b_url = ProcessUtil.process_image_file(file_name, file_obj, new_request, user, "all", 0)
j_time = time.time()
print(f"[INFO]: Duration of Pre-processing: {j_time - s_time}s")
print(f"[INFO]: b_url: {b_url}")
@ -171,9 +170,10 @@ class CtelViewSet(viewsets.ViewSet):
FileUtils.save_to_S3(_name, new_request, file_path)
count += 1
this_file = {
"file_name": _name,
"file_path": file_path,
"file_type": doc_type
"index_in_request": i,
"file_name": _name,
"file_path": file_path,
"file_type": doc_type
}
compact_files.append(this_file)
c_connector.do_pdf((rq_id, sub.id, p_type, user.id, compact_files))
@ -254,9 +254,11 @@ class CtelViewSet(viewsets.ViewSet):
def process_file(data):
idx, doc_type, doc_file, tmp_file_name = data
doc_file.seek(0)
index_in_request = int(tmp_file_name.split(".")[0].split("_")[-1])
file_path = FileUtils.resize_and_save_file(tmp_file_name, new_request, doc_file, 100)
FileUtils.save_to_S3(tmp_file_name, new_request, file_path)
return {
"index_in_request": index_in_request,
"idx": idx,
"file_name": tmp_file_name,
"file_path": file_path,
@ -275,7 +277,7 @@ class CtelViewSet(viewsets.ViewSet):
waiting_time = current_time - start_time
if waiting_time > time_limit:
break
time.sleep(0.2)
time.sleep(0.1)
report_filter = SubscriptionRequest.objects.filter(request_id=rq_id)
if report_filter.count() != 1:
raise InvalidException(excArgs='requestId')
@ -401,9 +403,10 @@ class CtelViewSet(viewsets.ViewSet):
file_name = f"{feedback_id}_{i}.csv"
# Save to local
file_path = FileUtils.save_feedback_file(file_name, new_request, file)
FileUtils.validate_feedback_file(file_path)
# Upload to S3
S3_path = FileUtils.save_feedback_to_S3(file_name, feedback_id, file_path)
# validate
FileUtils.validate_feedback_file(file_path)
# Process csv file in the background
ProcessUtil.process_feedback(feedback_id, file_path)
@ -449,6 +452,7 @@ class CtelViewSet(viewsets.ViewSet):
if user.id != user_data['internal_id'] or user.status != EntityStatus.ACTIVE.value:
raise PermissionDeniedException()
print(f"[DEBUG]: rq: {rq}, file_name: {file_name}")
file_data = SubscriptionRequestFile.objects.filter(request=rq, file_name=file_name)[0]
except IndexError:
raise NotFoundException(excArgs='file')

View File

@ -30,13 +30,16 @@ s3_client = S3Util.MinioS3Client(
bucket_name=settings.S3_BUCKET_NAME
)
def process_pdf_file(file_name: str, file_path: str, request, user) -> list:
def process_pdf_file(file_name: str, file_path: str, request, user, doc_type: str, index_in_request: int) -> list:
try:
# Origin file
code = f'FIL{uuid.uuid4().hex}'
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request,
file_name=file_name,
code=f'FIL{uuid.uuid4().hex}')
code=code,
doc_type=doc_type,
index_in_request=index_in_request)
new_request_file.save()
# Sub-file
return ProcessUtil.pdf_to_images_urls(FileUtils.get_file(file_path), request, user)
@ -120,7 +123,8 @@ def process_csv_feedback(csv_file_path, feedback_id):
def process_pdf(rq_id, sub_id, p_type, user_id, files):
"""
files: [{
"idx": int
"index_in_request": int,
"idx": int,
"file_name": "",
"file_path": "", # local path to file
"file_type": ""
@ -135,7 +139,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
idx, file = data
extension = file["file_name"].split(".")[-1].lower()
if extension == "pdf":
_b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user)
_b_urls = process_pdf_file(file["file_name"], file["file_path"], new_request, user, file["file_type"], file["index_in_request"])
if _b_urls is None:
new_request.status = 400
new_request.save()
@ -145,7 +149,7 @@ def process_pdf(rq_id, sub_id, p_type, user_id, files):
_b_urls[j]["page_number"] = idx
return idx, _b_urls[0]
elif extension in image_extensions:
this_url = ProcessUtil.process_image_local_file(file["file_name"], file["file_path"], new_request, user)[0]
this_url = ProcessUtil.process_image_local_file(file["file_name"], file["file_path"], new_request, user, file["file_type"], file["index_in_request"])[0]
this_url["page_number"] = idx
if file["file_type"]:
this_url["doc_type"] = file["file_type"]

View File

@ -36,6 +36,7 @@ app.conf.update({
Queue('invoice_sbt_rs'),
Queue('do_pdf'),
Queue('upload_file_to_s3'),
Queue('upload_feedback_to_s3'),
Queue('upload_obj_to_s3'),
Queue('remove_local_file'),
Queue('csv_feedback'),

View File

@ -0,0 +1,48 @@
# Generated by Django 4.1.3 on 2024-01-17 03:47
from django.db import migrations, models
import django.utils.timezone
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0165_feedbackrequest'),
]
operations = [
migrations.RemoveField(
model_name='subscriptionrequest',
name='is_bad_image_quality',
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='accuracy',
field=models.JSONField(null=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='doc_type',
field=models.CharField(default='', max_length=100),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='index_in_request',
field=models.IntegerField(default=0),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='is_bad_image_quality',
field=models.BooleanField(default=False),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='origin_name',
field=models.CharField(default='', max_length=300),
),
migrations.AlterField(
model_name='subscriptionrequestfile',
name='created_at',
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
),
]

View File

@ -0,0 +1,24 @@
from django.db import models
from django.utils import timezone
from fwd_api.models.Subscription import Subscription
class Report(models.Model):
id = models.AutoField(primary_key=True)
report_id = models.CharField(max_length=200) # Change to request_id
local_file_name = models.CharField(max_length=200) # Change to request_id
origin_name = models.CharField(max_length=200) # Change to request_id
error_status = models.JSONField(null=True)
created_at = models.DateTimeField(default=timezone.now, db_index=True)
updated_at = models.DateTimeField(auto_now=True)
subscription = models.ForeignKey(Subscription, on_delete=models.CASCADE)
S3_uploaded = models.BooleanField(default=False)
start_at = models.DateTimeField(null=True)
end_at = models.DateTimeField(null=True)
include_for_test_sample = models.BooleanField(default=False)
status = models.CharField(null=True)
number_request = models.IntegerField(default=0)
number_images = models.IntegerField(default=0)
number_bad_images = models.IntegerField(default=0)
average_client_time = models.JSONField(default=0) # {"0.1": 100, 0.2: 200, ...}
average_OCR_time = models.JSONField(default=0) # {"0.1": 98, 0.2: 202, ...}
accuracy = models.JSONField(null=True)

View File

@ -3,7 +3,6 @@ from django.utils import timezone
from fwd_api.models.Subscription import Subscription
class SubscriptionRequest(models.Model):
id = models.AutoField(primary_key=True)
pages: int = models.IntegerField()
@ -34,4 +33,3 @@ class SubscriptionRequest(models.Model):
total_memory = models.FloatField(default=-1)
gpu_stats = models.CharField(max_length=100, null=True)
is_reviewed = models.BooleanField(default=False)
is_bad_image_quality = models.BooleanField(default=False)

View File

@ -12,9 +12,14 @@ class SubscriptionRequestFile(models.Model):
return f"FIL{uuid.uuid4().hex}"
code = models.CharField(max_length=300, default=gen_random_code)
origin_name = models.CharField(max_length=300, default="")
file_name = models.CharField(max_length=300, default=None)
file_path = EncryptedCharField(max_length=500, default=None)
file_category = models.CharField(max_length=200, default=FileCategory.Origin.value)
request = models.ForeignKey(SubscriptionRequest, related_name="files", on_delete=models.CASCADE)
created_at = models.DateTimeField(default=timezone.now)
created_at = models.DateTimeField(default=timezone.now, db_index=True)
updated_at = models.DateTimeField(auto_now=True)
accuracy = models.JSONField(null=True)
is_bad_image_quality = models.BooleanField(default=False)
doc_type = models.CharField(max_length=100, default="")
index_in_request = models.IntegerField(default=0)

View File

@ -11,7 +11,7 @@ from fwd import settings
from fwd_api.constant.common import allowed_file_extensions
from fwd_api.exception.exceptions import GeneralException, RequiredFieldException, InvalidException, \
ServiceUnavailableException, FileFormatInvalidException, LimitReachedException, InvalidDecompressedSizeException, RequiredColumnException
from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest
from fwd_api.models import SubscriptionRequest, OcrTemplate, FeedbackRequest, SubscriptionRequestFile
from fwd_api.utils import process as ProcessUtil
from fwd_api.utils.crypto import image_authenticator
from fwd_api.utils.image import resize
@ -145,7 +145,7 @@ def save_feedback_file(file_name: str, rq: FeedbackRequest, uploaded_file: dict)
user_id = str(rq.subscription.user.id)
feedback_id = str(rq.id)
folder_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, "feedbacks", feedback_id, 'requests', feedback_id)
folder_path = os.path.join(settings.MEDIA_ROOT, 'users', user_id, "feedbacks", feedback_id)
os.makedirs(folder_path, exist_ok = True)
file_path = os.path.join(folder_path, file_name)
@ -177,7 +177,7 @@ def save_template_file(file_name: str, rq: OcrTemplate, file: TemporaryUploadedF
print(e)
raise ServiceUnavailableException()
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path):
def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, folder_path: str):
try:
file_path = os.path.join(folder_path, file_name)
extension = file_name.split(".")[-1]
@ -193,10 +193,19 @@ def save_file_with_path(file_name: str, file: TemporaryUploadedFile, quality, fo
return file_path
def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality):
def resize_and_save_file(file_name: str, rq: SubscriptionRequest, file: TemporaryUploadedFile, quality: int):
try:
folder_path = get_folder_path(rq)
pathlib.Path(folder_path).mkdir(exist_ok=True, parents=True)
# request_file: SubscriptionRequestFile = SubscriptionRequestFile(
# file_name = file_name,
# file_path = os.path.join(folder_path, file_name),
# doc_type = doc_type,
# origin_name = file.name,
# request = rq,
# index_in_request= index_in_request
# )
# request_file.save()
return save_file_with_path(file_name, file, quality, folder_path)
except InvalidDecompressedSizeException as e:
raise e
@ -219,8 +228,11 @@ def save_to_S3(file_name, rq, local_file_path):
def save_feedback_to_S3(file_name, id, local_file_path):
try:
assert len(local_file_path.split("/")) >= 2, "file_path must have at least feedback_folder and feedback_id"
s3_key = os.path.join(local_file_path.split("/")[-2], local_file_path.split("/")[-1], file_name)
# print(f"[DEBUG]: Uploading feedback to S3 with local path {local_file_path}, id: {id}, file_name: {file_name}")
assert len(local_file_path.split("/")) >= 3, "file_path must have at least feedback_folder and feedback_id"
# s3_key = os.path.join(local_file_path.split("/")[-3], local_file_path.split("/")[-2], file_name)
s3_key = os.path.join("feedback", local_file_path.split("/")[-2], file_name)
# print(f"[DEBUG]: Uploading feedback to S3 with s3_key {s3_key}")
c_connector.upload_feedback_to_s3((local_file_path, s3_key, id))
c_connector.remove_local_file((local_file_path, id))
return s3_key

View File

@ -363,7 +363,7 @@ def send_template_queue(rq_id, file_url, template: OcrTemplate, uid):
def process_feedback(feedback_id, local_file_path):
c_connector.csv_feedback((local_file_path, feedback_id))
def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user) -> list:
def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list:
doc: fitz.Document = fitz.open(stream=file_obj.file.read())
if doc.page_count > settings.MAX_PAGES_OF_PDF_FILE:
raise LimitReachedException(excArgs=('Number of pages', str(settings.MAX_PAGES_OF_PDF_FILE), 'pages'))
@ -372,16 +372,18 @@ def process_pdf_file(file_name: str, file_obj: TemporaryUploadedFile, request: S
# Origin file
file_obj.seek(0)
file_path = FileUtils.resize_and_save_file(file_name, request, file_obj, 100)
code = f'FIL{uuid.uuid4().hex}'
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request,
file_name=file_name,
code=f'FIL{uuid.uuid4().hex}')
code=code,
doc_type=doc_type,
index_in_request=index_in_request)
new_request_file.save()
# Sub-file
return pdf_to_images_urls(doc, request, user)
def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user) -> list:
def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list:
if file_obj.size > settings.SIZE_TO_COMPRESS:
quality = 95
else:
@ -390,7 +392,9 @@ def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request:
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request,
file_name=file_name,
code=f'FIL{uuid.uuid4().hex}')
code=f'FIL{uuid.uuid4().hex}',
doc_type=doc_type,
index_in_request=index_in_request)
new_request_file.save()
return [{
'file_url': FileUtils.build_url(FolderFileType.REQUESTS.value, request.request_id, user.id, file_name),
@ -398,11 +402,13 @@ def process_image_file(file_name: str, file_obj: TemporaryUploadedFile, request:
'request_file_id': new_request_file.code
}]
def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user) -> list:
def process_image_local_file(file_name: str, file_path: str, request: SubscriptionRequest, user, doc_type: str, index_in_request: int) -> list:
new_request_file: SubscriptionRequestFile = SubscriptionRequestFile(file_path=file_path,
request=request,
file_name=file_name,
code=f'FIL{uuid.uuid4().hex}')
code=f'FIL{uuid.uuid4().hex}',
doc_type=doc_type,
index_in_request=index_in_request)
new_request_file.save()
return [{
'file_url': FileUtils.build_url(FolderFileType.REQUESTS.value, request.request_id, user.id, file_name),

View File

@ -63,9 +63,10 @@ services:
- AUTH_TOKEN_LIFE_TIME=${AUTH_TOKEN_LIFE_TIME}
- IMAGE_TOKEN_LIFE_TIME=${IMAGE_TOKEN_LIFE_TIME}
- INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY}
- FI_USER_NAME=${FI_USER_NAME}
- FI_PASSWORD=${FI_PASSWORD}
- S3_ENDPOINT=${S3_ENDPOINT}
- ADMIN_USER_NAME=${ADMIN_USER_NAME}
- ADMIN_PASSWORD=${ADMIN_PASSWORD}
- STANDARD_USER_NAME=${STANDARD_USER_NAME}
- STANDARD_PASSWORD=${STANDARD_PASSWORD}
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}
- S3_BUCKET_NAME=${S3_BUCKET_NAME}

View File

@ -44,8 +44,10 @@ services:
- AUTH_TOKEN_LIFE_TIME=${AUTH_TOKEN_LIFE_TIME}
- IMAGE_TOKEN_LIFE_TIME=${IMAGE_TOKEN_LIFE_TIME}
- INTERNAL_SDS_KEY=${INTERNAL_SDS_KEY}
- FI_USER_NAME=${FI_USER_NAME}
- FI_PASSWORD=${FI_PASSWORD}
- ADMIN_USER_NAME=${ADMIN_USER_NAME}
- ADMIN_PASSWORD=${ADMIN_PASSWORD}
- STANDARD_USER_NAME=${STANDARD_USER_NAME}
- STANDARD_PASSWORD=${STANDARD_PASSWORD}
- S3_ENDPOINT=${S3_ENDPOINT}
- S3_ACCESS_KEY=${S3_ACCESS_KEY}
- S3_SECRET_KEY=${S3_SECRET_KEY}