Update: API, logic

This commit is contained in:
dx-tan 2024-05-17 19:39:31 +07:00
parent e1d9f24cbf
commit e30ba0f40b
16 changed files with 196 additions and 72 deletions

View File

@ -12,7 +12,8 @@ docker compose --profile local up -d
## Run tests ## Run tests
```bash ```bash
pip3 install pytest requests pip3 install pytest requests deepdiff
IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=<username> IDP_PASSWORD=<password> python3 -m pytest --log-cli-level=DEBUG --capture=sys tests IDP_HOST=https://sbt.idp.sdsrv.ai IDP_USERNAME=sbt IDP_PASSWORD=7Eg4AbWIXDnufgn python3 -m pytest --log-cli-level=DEBUG --capture=sys tests
``` ```
IDP_HOST=http://ec2-13-212-90-87.ap-southeast-1.compute.amazonaws.com:9881 IDP_USERNAME=sbt IDP_PASSWORD=abc python3 -m pytest --log-cli-level=DEBUG --capture=sys tests

@ -1 +1 @@
Subproject commit 5eedd64e87c405918ad9fc37cb6163f8d04ce49d Subproject commit ec0d9a0a5a8c17c9a78165d1f78103951a2e138b

View File

@ -32,9 +32,9 @@ RUN pip install -r requirements.txt --no-cache-dir
COPY --chown=${UID}:${GID} . /app COPY --chown=${UID}:${GID} . /app
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsv_dewarp && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtd && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/sdsvocr/externals/sdsvtr && pip3 install -v -e . --no-cache-dir
RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir
# For intergration with sdskvu # For intergration with sdskvu

View File

@ -1,10 +0,0 @@
from fwd_api.models.UserProfile import UserProfile
from fwd_api.models.UserProfile import Subscription
from fwd_api.models.UserProfile import PricingPlan
def add_user(user_name, _id):
pricing_plan = PricingPlan(id=_id, code=10, token_limitations=1000000000, duration=1000000000)
user = UserProfile(id=_id, full_name=user_name)
subscription = Subscription(id=_id, current_token=0, limit_token=1000000000, pricing_plan=pricing_plan, )

View File

@ -38,16 +38,16 @@ class AccuracyViewSet(viewsets.ViewSet):
OpenApiParameter( OpenApiParameter(
name='start_date', name='start_date',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
description='Start date (YYYY-mm-DDTHH:MM:SSZ)', description='Start date (YYYY-mm-DD)',
type=OpenApiTypes.DATE, type=OpenApiTypes.DATE,
default='2023-01-02T00:00:00+0700', default='2023-01-02',
), ),
OpenApiParameter( OpenApiParameter(
name='end_date', name='end_date',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
description='End date (YYYY-mm-DDTHH:MM:SSZ)', description='End date (YYYY-mm-DD',
type=OpenApiTypes.DATE, type=OpenApiTypes.DATE,
default='2024-01-10T00:00:00+0700', default='2024-01-10',
), ),
OpenApiParameter( OpenApiParameter(
name='includes_test', name='includes_test',
@ -62,6 +62,13 @@ class AccuracyViewSet(viewsets.ViewSet):
type=OpenApiTypes.STR, type=OpenApiTypes.STR,
enum=['reviewed', 'not_reviewed', 'all'], enum=['reviewed', 'not_reviewed', 'all'],
), ),
OpenApiParameter(
name='is_required',
location=OpenApiParameter.QUERY,
description='Which records to be query',
type=OpenApiTypes.STR,
enum=['required', 'not_required'],
),
OpenApiParameter( OpenApiParameter(
name='subsidiary', name='subsidiary',
location=OpenApiParameter.QUERY, location=OpenApiParameter.QUERY,
@ -115,6 +122,7 @@ class AccuracyViewSet(viewsets.ViewSet):
request_id = request.GET.get('request_id', None) request_id = request.GET.get('request_id', None)
with_redemption_id = request.GET.get('with_redemption_id', True) with_redemption_id = request.GET.get('with_redemption_id', True)
is_reviewed = request.GET.get('is_reviewed', None) is_reviewed = request.GET.get('is_reviewed', None)
is_required = request.GET.get('is_required', None)
include_test = request.GET.get('includes_test', False) include_test = request.GET.get('includes_test', False)
subsidiary = request.GET.get("subsidiary", "all") subsidiary = request.GET.get("subsidiary", "all")
max_accuracy = float(request.GET.get("max_accuracy", 100)) max_accuracy = float(request.GET.get("max_accuracy", 100))
@ -169,6 +177,11 @@ class AccuracyViewSet(viewsets.ViewSet):
base_query &= Q(is_reviewed=False) base_query &= Q(is_reviewed=False)
elif is_reviewed == "all": elif is_reviewed == "all":
pass pass
if isinstance(is_required, str):
if is_required == "required":
base_query &= Q(is_required=True)
elif is_required == "not_required":
base_query &= Q(is_required=False)
if isinstance(subsidiary, str): if isinstance(subsidiary, str):
if subsidiary.lower() != "seao": if subsidiary.lower() != "seao":
if subsidiary not in list(settings.SUBS.keys()): if subsidiary not in list(settings.SUBS.keys()):
@ -721,9 +734,13 @@ class AccuracyViewSet(viewsets.ViewSet):
'multipart/form-data': { 'multipart/form-data': {
'type': 'object', 'type': 'object',
'properties': { 'properties': {
'reviewed_result': { 'request_file_results': {
'type': 'array',
'items': {
'type': 'string', 'type': 'string',
'default': '''{"request_id": "Sample request_id", "imei_number": ["sample_imei1", "sample_imei2"], "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party", "invoice_no": "Sample Invoice no"}''', # 'format': 'binary',
'default': '''{"request_file_id": "Sample request_id", "imei_number": "sample_imei1", "retailername": "Sample Retailer", "purchase_date": "01/01/1970", "sold_to_party": "Sample party", "invoice_no": "Sample Invoice no"}''',
}
}, },
}, },
}, },
@ -851,43 +868,48 @@ class AccuracyViewSet(viewsets.ViewSet):
if subscription_request.count() == 0: if subscription_request.count() == 0:
raise NotFoundException(excArgs=request_id) raise NotFoundException(excArgs=request_id)
if "request_file_results" not in data or not isinstance(data.get("request_file_results", None), list):
raise InvalidException(excArgs='request_file_results')
# TODO: check if requestfiles all comes from the request_id
subscription_request = subscription_request.first() subscription_request = subscription_request.first()
reviewed_result = data["request_file_results"]
requestfiles = []
for file_result in reviewed_result:
request_file_id = file_result.get("request_file_id", None)
if request_file_id is None:
raise InvalidException("request_file_id")
subscription_request_files = SubscriptionRequestFile.objects.filter( subscription_request_files = SubscriptionRequestFile.objects.filter(
request=subscription_request.id) request=request_file_id)
if subscription_request_files.count() == 0:
raise InvalidException(excArgs=f'request_file_id: {request_file_id}')
if "reviewed_result" not in data: this_file = subscription_request_files.first()
raise InvalidException(excArgs=f'reviewed_result') requestfiles.append(this_file)
sample_result = {
reviewed_result = data["reviewed_result"] "request_id": request_id,
if not subscription_request.predict_result:
raise InvalidException(excArgs=f'request_id')
validate_review(reviewed_result, len(subscription_request.predict_result.get(
"content", {}).get("document", [{}])[0].get("content", [{}, {}, {}, {}])[3].get("value", [])))
reviewed_result['request_id'] = request_id
for subscription_request_file in subscription_request_files:
if subscription_request_file.doc_type == 'invoice':
subscription_request_file.reviewed_result = copy.deepcopy(
reviewed_result)
subscription_request_file.reviewed_result['imei_number'] = [
]
elif subscription_request_file.doc_type == 'imei':
subscription_request_file.reviewed_result = {
"retailername": None, "retailername": None,
"sold_to_party": None, "sold_to_party": None,
"invoice_no": None, "invoice_no": None,
"purchase_date": [], "purchase_date": None,
"imei_number": []} "imei_number": []
if len(reviewed_result["imei_number"])-1 >= subscription_request_file.index_in_request: }
subscription_request_file.reviewed_result["imei_number"] = [reviewed_result[ for k, v in file_result.items():
"imei_number"][subscription_request_file.index_in_request]] if k in sample_result.keys():
subscription_request_file.save() if not isinstance(sample_result[k], list):
sample_result[k] = v
else:
sample_result[k].append(v)
subscription_request.reviewed_result = reviewed_result if this_file.reviewed_result is not None:
subscription_request.reviewed_result['request_id'] = request_id this_file.reviewed_result.update(sample_result)
subscription_request.is_reviewed = True else:
subscription_request.save() this_file.reviewed_result = sample_result
for rf in requestfiles:
rf.is_reviewed = True
rf.save()
return JsonResponse({'message': 'success.'}, status=200) return JsonResponse({'message': 'success.'}, status=200)
else: else:

View File

@ -141,6 +141,7 @@ def create_accuracy_report(report_id, **kwargs):
request.raw_accuracy = mean_list(rq_accuracy["reviewed"]) request.raw_accuracy = mean_list(rq_accuracy["reviewed"])
elif len(rq_accuracy["feedback"]) > 0: elif len(rq_accuracy["feedback"]) > 0:
request.raw_accuracy = mean_list(rq_accuracy["feedback"]) request.raw_accuracy = mean_list(rq_accuracy["feedback"])
request.is_required = request.raw_accuracy < settings.BAD_THRESHOLD
request.save() request.save()
number_images += request_att["total_images"] number_images += request_att["total_images"]

View File

@ -0,0 +1,67 @@
from django.core.management.base import BaseCommand
from tqdm import tqdm
from fwd_api.models import SubscriptionRequestFile, SubscriptionRequest
from fwd_api.exception.exceptions import InvalidException
from fwd_api.utils.accuracy import predict_result_to_ready
import traceback
import copy
from django.utils import timezone
KEY = "retailername"
VALUE = "Lazada"
REASONS_TO_CONSIDER = ["invalid_image", "Invalid Input"]
class Command(BaseCommand):
help = 'Refactor database for image level'
def add_arguments(self, parser):
# Add your command-line arguments here
parser.add_argument('start', type=str, help='start date, sample: 2023-01-02T00:00:00+0700')
parser.add_argument('end', type=str, help='end date, sample: 2023-01-03T00:00:00+0700')
def process_request(self, request, file):
if len(request.request_id.split(".")[0].split("_")) < 2:
return
images = SubscriptionRequestFile.objects.filter(request=request)
if not request.predict_result:
self.stdout.write(self.style.WARNING(f"Key predict_result not found in {request.request_id}"))
return
if request.predict_result.get("status", 200) != 200:
self.stdout.write(self.style.WARNING(f"Not a sucess request {request.request_id}"))
return
for i, image in enumerate(images):
try:
if isinstance(image.reviewed_result, dict):
# print(f"[INFO]: dict ... {request.predict_result}")
if image.reviewed_result.get(KEY, None)==VALUE and image.reason in REASONS_TO_CONSIDER:
image.reason = ""
image.counter_measures = ""
image.save()
file.write(str(image.code) + "\n")
except Exception as e:
self.stdout.write(self.style.ERROR(f"Request: {request.request_id} failed with {e}"))
print(traceback.format_exc())
continue
def handle(self, *args, **options):
start = options['start']
end = options['end']
if start or end:
try:
start_date = timezone.datetime.strptime(start, '%Y-%m-%dT%H:%M:%S%z') # We care only about day precision only
end_date = timezone.datetime.strptime(end, '%Y-%m-%dT%H:%M:%S%z')
except Exception as e:
print(f"[INFO]: start: {start}")
print(f"[INFO]: end: {end}")
raise InvalidException(excArgs="Date format")
subcription_iter = SubscriptionRequest.objects.filter(created_at__range=(start_date, end_date))
else:
subcription_iter = SubscriptionRequest.objects.all()
file = open("modified.txt", "w")
for request in tqdm(subcription_iter.iterator()):
self.process_request(request, file)
file.close()
self.stdout.write(self.style.SUCCESS('Sample Django management command executed successfully!'))

View File

@ -48,7 +48,6 @@ class Command(BaseCommand):
for i, image in enumerate(images): for i, image in enumerate(images):
try: try:
if isinstance(image.predict_result, dict): if isinstance(image.predict_result, dict):
# print(f"[INFO]: dict ... {request.predict_result}")
image.predict_result[KEY] = None image.predict_result[KEY] = None
if isinstance(image.feedback_result, dict): if isinstance(image.feedback_result, dict):
image.feedback_result[KEY] = None image.feedback_result[KEY] = None

View File

@ -0,0 +1,28 @@
# Generated by Django 4.1.3 on 2024-05-15 09:57
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('fwd_api', '0190_rename_accuracy_subscriptionrequest_raw_accuracy'),
]
operations = [
migrations.AddField(
model_name='subscriptionrequest',
name='is_required',
field=models.BooleanField(default=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='is_required',
field=models.BooleanField(default=True),
),
migrations.AddField(
model_name='subscriptionrequestfile',
name='is_reviewed',
field=models.BooleanField(default=False),
),
]

View File

@ -37,4 +37,5 @@ class SubscriptionRequest(models.Model):
total_memory = models.FloatField(default=-1) total_memory = models.FloatField(default=-1)
gpu_stats = models.CharField(max_length=100, null=True) gpu_stats = models.CharField(max_length=100, null=True)
is_reviewed = models.BooleanField(default=False) is_reviewed = models.BooleanField(default=False)
is_required = models.BooleanField(default=True)
subsidiary = models.CharField(default="", null=True, max_length=200) subsidiary = models.CharField(default="", null=True, max_length=200)

View File

@ -25,6 +25,8 @@ class SubscriptionRequestFile(models.Model):
processing_time = models.FloatField(default=-1) # in milisecond processing_time = models.FloatField(default=-1) # in milisecond
reason = models.TextField(blank=True) reason = models.TextField(blank=True)
counter_measures = models.TextField(blank=True) counter_measures = models.TextField(blank=True)
is_reviewed = models.BooleanField(default=False)
is_required = models.BooleanField(default=True)
# subsidiary = models.CharField(default="", null=True, max_length=200) # save already in SubscriptionRequest # subsidiary = models.CharField(default="", null=True, max_length=200) # save already in SubscriptionRequest
predict_result = models.JSONField(null=True) predict_result = models.JSONField(null=True)

View File

@ -791,6 +791,14 @@ def create_billing_data(subscription_requests):
}) })
return billing_data return billing_data
def avg_dict(data):
values = []
for k, v in data.items():
if isinstance(v, list):
values += v
return sum(values)/len(values) if len(values) > 0 else -1
def calculate_a_request(report, request): def calculate_a_request(report, request):
def review_status_map(input): def review_status_map(input):
review_status = {-1: "Not Required", review_status = {-1: "Not Required",
@ -827,7 +835,6 @@ def calculate_a_request(report, request):
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
report_files = [] report_files = []
for image in images: for image in images:
status, att = calculate_subcription_file(image) status, att = calculate_subcription_file(image)
atts.append(att) atts.append(att)
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"]) att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
@ -835,6 +842,12 @@ def calculate_a_request(report, request):
_att = copy.deepcopy(att) _att = copy.deepcopy(att)
fb_avg_acc = avg_dict(att["acc"]["feedback"])
rv_avg_acc = avg_dict(att["acc"]["reviewed"])
image.is_required = fb_avg_acc < settings.BAD_THRESHOLD
image.is_reviewed = rv_avg_acc > 0
if image.processing_time < 0: if image.processing_time < 0:
continue continue
if status != 200: if status != 200:

@ -1 +1 @@
Subproject commit 5eedd64e87c405918ad9fc37cb6163f8d04ce49d Subproject commit ec0d9a0a5a8c17c9a78165d1f78103951a2e138b

0
cope2n-api/modified.txt Normal file
View File

View File

@ -20,7 +20,7 @@ services:
environment: environment:
- PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module - PYTHONPATH=${PYTHONPATH}:/workspace/cope2n-ai-fi # For import module
- CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672 - CELERY_BROKER=amqp://${RABBITMQ_DEFAULT_USER}:${RABBITMQ_DEFAULT_PASS}@rabbitmq-sbt:5672
- CUDA_VISIBLE_DEVICES=0 - CUDA_VISIBLE_DEVICES=1
volumes: volumes:
- ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only - ./cope2n-ai-fi:/workspace/cope2n-ai-fi # for dev container only
working_dir: /workspace/cope2n-ai-fi working_dir: /workspace/cope2n-ai-fi
@ -89,12 +89,12 @@ services:
depends_on: depends_on:
db-sbt: db-sbt:
condition: service_started condition: service_started
command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input && # command: sh -c "chmod -R 777 /app; sleep 5; python manage.py collectstatic --no-input &&
python manage.py makemigrations && # python manage.py makemigrations &&
python manage.py migrate && # python manage.py migrate &&
python manage.py compilemessages && # python manage.py compilemessages &&
gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod # gunicorn fwd.asgi:application -k uvicorn.workers.UvicornWorker --timeout 300 -b 0.0.0.0:9000" # pre-makemigrations on prod
# command: "sleep infinity" command: "sleep infinity"
minio: minio:
image: minio/minio image: minio/minio
@ -179,8 +179,8 @@ services:
- ./cope2n-api:/app - ./cope2n-api:/app
working_dir: /app working_dir: /app
command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5" # command: sh -c "celery -A fwd_api.celery_worker.worker worker -l INFO -c 5"
# command: bash -c "tail -f > /dev/null" command: bash -c "tail -f > /dev/null"
# Back-end persistent # Back-end persistent
db-sbt: db-sbt:
@ -196,7 +196,7 @@ services:
- POSTGRES_PASSWORD=${DB_PASSWORD} - POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=${DB_SCHEMA} - POSTGRES_DB=${DB_SCHEMA}
ports: ports:
- 15432:5432 - 54321:5432
rabbitmq-sbt: rabbitmq-sbt:
mem_reservation: 600m mem_reservation: 600m
@ -220,7 +220,7 @@ services:
shm_size: 10gb shm_size: 10gb
privileged: true privileged: true
ports: ports:
- 9881:80 - 29881:80
depends_on: depends_on:
be-ctel-sbt: be-ctel-sbt:
condition: service_started condition: service_started