2024-01-31 03:00:18 +00:00
import re
from datetime import datetime
import copy
2024-02-01 07:32:20 +00:00
from typing import Any
2024-01-31 03:00:18 +00:00
from . ocr_utils . ocr_metrics import eval_ocr_metric
from . ocr_utils . sbt_report import post_processing_str
2024-02-18 14:52:23 +00:00
from fwd_api . constant . common import FileCategory
2024-01-31 03:00:18 +00:00
from fwd_api . models import SubscriptionRequest , SubscriptionRequestFile , ReportFile
from . . celery_worker . client_connector import c_connector
2024-02-06 03:14:44 +00:00
from . . utils . file import dict2xlsx , save_workbook_file , save_report_to_S3
2024-02-16 10:38:19 +00:00
from . . utils . subsidiary import map_subsidiary_short_to_long
2024-06-24 08:38:26 +00:00
from . . utils . processing_time import backend_cost
2024-02-01 07:32:20 +00:00
from django . db . models import Q
2024-02-05 05:56:51 +00:00
from django . utils import timezone
from fwd import settings
from . . models import SubscriptionRequest , Report , ReportFile
2024-04-04 06:58:16 +00:00
from typing import Union , List , Dict
2024-06-26 07:58:24 +00:00
import logging
logger = logging . getLogger ( __name__ )
2024-01-31 03:00:18 +00:00
2024-06-17 12:05:20 +00:00
VALID_KEYS = [ " retailername " , " sold_to_party " , " invoice_no " , " purchase_date " , " imei_number " ]
OPTIONAL_KEYS = [ ' invoice_no ' ]
2024-01-31 03:00:18 +00:00
2024-02-05 05:56:51 +00:00
class ReportAccumulateByRequest :
def __init__ ( self , sub ) :
2024-02-06 03:14:44 +00:00
# self.redis_client = redis.Redis(host=settings.REDIS_HOST, port=settings.REDIS_PORT, decode_responses=True)
2024-02-05 05:56:51 +00:00
self . sub = sub
self . current_time = None
self . data = { } # {"month": [total, {"day": day_data}]}
2024-06-11 12:11:56 +00:00
self . month_format = {
2024-02-05 05:56:51 +00:00
' subs ' : " + " ,
' extraction_date ' : " Subtotal () " ,
2024-02-16 10:38:19 +00:00
' num_imei ' : 0 ,
' num_invoice ' : 0 ,
2024-02-05 05:56:51 +00:00
' total_images ' : 0 ,
' images_quality ' : {
' successful ' : 0 ,
' successful_percent ' : 0 ,
' bad ' : 0 ,
' bad_percent ' : 0
} ,
' average_accuracy_rate ' : {
2024-03-14 03:11:20 +00:00
' imei_number ' : IterAvg ( ) ,
2024-02-05 05:56:51 +00:00
' purchase_date ' : IterAvg ( ) ,
2024-03-14 03:11:20 +00:00
' retailername ' : IterAvg ( ) ,
2024-03-13 06:18:57 +00:00
' sold_to_party ' : IterAvg ( ) ,
' invoice_no ' : IterAvg ( )
2024-02-05 05:56:51 +00:00
} ,
' average_processing_time ' : {
' imei ' : IterAvg ( ) ,
2024-03-19 09:35:26 +00:00
' invoice ' : IterAvg ( ) ,
' avg ' : IterAvg ( ) ,
2024-02-05 05:56:51 +00:00
} ,
2024-06-24 08:38:26 +00:00
' file_average_processing_time ' : {
' imei ' : IterAvg ( ) ,
' invoice ' : IterAvg ( ) ,
' avg ' : IterAvg ( ) ,
} ,
2024-02-05 05:56:51 +00:00
' usage ' : {
' imei ' : 0 ,
' invoice ' : 0 ,
2024-02-16 10:38:19 +00:00
' total_images ' : 0 ,
2024-02-05 05:56:51 +00:00
' request ' : 0
} ,
' feedback_accuracy ' : {
' imei_number ' : IterAvg ( ) ,
' purchase_date ' : IterAvg ( ) ,
' retailername ' : IterAvg ( ) ,
2024-03-13 06:18:57 +00:00
' sold_to_party ' : IterAvg ( ) ,
' invoice_no ' : IterAvg ( )
2024-02-05 05:56:51 +00:00
} ,
' reviewed_accuracy ' : {
' imei_number ' : IterAvg ( ) ,
' purchase_date ' : IterAvg ( ) ,
' retailername ' : IterAvg ( ) ,
2024-03-13 06:18:57 +00:00
' sold_to_party ' : IterAvg ( ) ,
' invoice_no ' : IterAvg ( )
2024-02-05 05:56:51 +00:00
} ,
2024-02-28 11:45:10 +00:00
' num_request ' : 0 ,
2024-02-29 06:12:50 +00:00
" review_progress " : [ ]
2024-02-05 05:56:51 +00:00
}
self . day_format = {
' subs ' : sub ,
' extraction_date ' : " " ,
' num_imei ' : 0 ,
' num_invoice ' : 0 ,
' total_images ' : 0 ,
' images_quality ' : {
' successful ' : 0 ,
' successful_percent ' : 0 ,
' bad ' : 0 ,
' bad_percent ' : 0
} ,
' average_accuracy_rate ' : {
2024-03-12 08:52:49 +00:00
' imei_number ' : IterAvg ( ) ,
2024-02-05 05:56:51 +00:00
' purchase_date ' : IterAvg ( ) ,
2024-03-14 03:11:20 +00:00
' retailername ' : IterAvg ( ) ,
2024-03-13 06:18:57 +00:00
' sold_to_party ' : IterAvg ( ) ,
' invoice_no ' : IterAvg ( )
2024-02-05 05:56:51 +00:00
} ,
' average_processing_time ' : {
' imei ' : IterAvg ( ) ,
2024-03-19 09:35:26 +00:00
' invoice ' : IterAvg ( ) ,
' avg ' : IterAvg ( ) ,
2024-02-05 05:56:51 +00:00
} ,
2024-06-24 08:38:26 +00:00
' file_average_processing_time ' : {
' imei ' : IterAvg ( ) ,
' invoice ' : IterAvg ( ) ,
' avg ' : IterAvg ( ) ,
} ,
2024-02-05 05:56:51 +00:00
' usage ' : {
' imei ' : 0 ,
' invoice ' : 0 ,
2024-02-16 10:38:19 +00:00
' total_images ' : 0 ,
2024-02-05 05:56:51 +00:00
' request ' : 0
} ,
' feedback_accuracy ' : {
' imei_number ' : IterAvg ( ) ,
' purchase_date ' : IterAvg ( ) ,
' retailername ' : IterAvg ( ) ,
2024-03-13 06:18:57 +00:00
' sold_to_party ' : IterAvg ( ) ,
' invoice_no ' : IterAvg ( )
2024-02-05 05:56:51 +00:00
} ,
' reviewed_accuracy ' : {
' imei_number ' : IterAvg ( ) ,
' purchase_date ' : IterAvg ( ) ,
' retailername ' : IterAvg ( ) ,
2024-03-13 06:18:57 +00:00
' sold_to_party ' : IterAvg ( ) ,
' invoice_no ' : IterAvg ( )
2024-02-05 05:56:51 +00:00
} ,
" report_files " : [ ] ,
2024-02-28 11:45:10 +00:00
" num_request " : 0 ,
2024-02-29 06:12:50 +00:00
" review_progress " : [ ]
2024-02-05 05:56:51 +00:00
} ,
2024-06-11 12:11:56 +00:00
self . report = copy . deepcopy ( self . month_format )
2024-06-17 12:05:20 +00:00
self . report [ " average_accuracy_rate " ] [ " avg " ] = IterAvg ( )
2024-02-05 05:56:51 +00:00
@staticmethod
2024-06-24 08:38:26 +00:00
def update_total ( total , report_file , image_avg_cost ) :
2024-06-11 12:11:56 +00:00
# Update total = update total
2024-03-06 08:09:29 +00:00
if report_file . bad_image_reason not in settings . ACC_EXCLUDE_RESEASONS :
total [ " images_quality " ] [ " successful " ] + = 1 if not report_file . is_bad_image else 0
total [ " images_quality " ] [ " bad " ] + = 1 if report_file . is_bad_image else 0
total [ " total_images " ] + = 1
doc_type = " imei "
if report_file . doc_type in [ " imei " , " invoice " , " all " ] :
doc_type = report_file . doc_type
else :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doc type { report_file . doc_type } if request id: { report_file . correspond_request_id } " )
2024-03-06 08:09:29 +00:00
total [ " num_imei " ] + = 1 if doc_type == " imei " else 0
total [ " num_invoice " ] + = 1 if doc_type == " invoice " else 0
2024-02-05 05:56:51 +00:00
2024-03-12 08:52:49 +00:00
for key in settings . FIELD :
if sum ( [ len ( report_file . reviewed_accuracy [ x ] ) for x in report_file . reviewed_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
total [ " average_accuracy_rate " ] [ key ] . add ( report_file . reviewed_accuracy . get ( key , [ ] ) )
2024-06-17 12:05:20 +00:00
total [ " average_accuracy_rate " ] [ ' avg ' ] . add ( report_file . reviewed_accuracy . get ( key , [ ] ) )
2024-03-12 08:52:49 +00:00
elif sum ( [ len ( report_file . feedback_accuracy [ x ] ) for x in report_file . feedback_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
total [ " average_accuracy_rate " ] [ key ] . add ( report_file . feedback_accuracy . get ( key , [ ] ) )
2024-06-17 12:05:20 +00:00
total [ " average_accuracy_rate " ] [ ' avg ' ] . add ( report_file . feedback_accuracy . get ( key , [ ] ) )
2024-03-06 08:09:29 +00:00
total [ " feedback_accuracy " ] [ key ] . add ( report_file . feedback_accuracy . get ( key , [ ] ) )
total [ " reviewed_accuracy " ] [ key ] . add ( report_file . reviewed_accuracy . get ( key , [ ] ) )
2024-02-05 05:56:51 +00:00
2024-03-06 08:09:29 +00:00
if not total [ " average_processing_time " ] . get ( report_file . doc_type , None ) :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doctype: { report_file . doc_type } " )
2024-03-06 08:09:29 +00:00
total [ " average_processing_time " ] [ report_file . doc_type ] = IterAvg ( )
2024-03-19 09:35:26 +00:00
total [ " average_processing_time " ] [ report_file . doc_type ] . add_avg ( report_file . time_cost , 1 ) if report_file . time_cost else 0
total [ " average_processing_time " ] [ " avg " ] . add_avg ( report_file . time_cost , 1 ) if report_file . time_cost else 0
2024-02-05 05:56:51 +00:00
2024-06-24 08:38:26 +00:00
if not total [ " file_average_processing_time " ] . get ( report_file . doc_type , None ) :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doctype: { report_file . doc_type } " )
2024-06-24 08:38:26 +00:00
total [ " file_average_processing_time " ] [ report_file . doc_type ] = IterAvg ( )
total [ " file_average_processing_time " ] [ report_file . doc_type ] . add_avg ( image_avg_cost , 1 ) if image_avg_cost else 0
total [ " file_average_processing_time " ] [ " avg " ] . add_avg ( image_avg_cost , 1 ) if image_avg_cost else 0
2024-03-06 08:09:29 +00:00
doc_type = " imei "
if report_file . doc_type in [ " imei " , " invoice " , " all " ] :
doc_type = report_file . doc_type
else :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doc type { report_file . doc_type } if request id: { report_file . correspond_request_id } " )
2024-03-06 08:09:29 +00:00
total [ " usage " ] [ " imei " ] + = 1 if doc_type == " imei " else 0
total [ " usage " ] [ " invoice " ] + = 1 if doc_type == " invoice " else 0
total [ " usage " ] [ " total_images " ] + = 1
total [ " review_progress " ] . append ( report_file . review_status )
2024-02-05 05:56:51 +00:00
return total
2024-06-11 12:11:56 +00:00
@staticmethod
2024-06-24 08:38:26 +00:00
def update_month ( month , report_file , image_avg_cost ) :
2024-06-11 12:11:56 +00:00
# Update month = update month
if report_file . bad_image_reason not in settings . ACC_EXCLUDE_RESEASONS :
month [ " images_quality " ] [ " successful " ] + = 1 if not report_file . is_bad_image else 0
month [ " images_quality " ] [ " bad " ] + = 1 if report_file . is_bad_image else 0
month [ " total_images " ] + = 1
doc_type = " imei "
if report_file . doc_type in [ " imei " , " invoice " , " all " ] :
doc_type = report_file . doc_type
else :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doc type { report_file . doc_type } if request id: { report_file . correspond_request_id } " )
2024-06-11 12:11:56 +00:00
month [ " num_imei " ] + = 1 if doc_type == " imei " else 0
month [ " num_invoice " ] + = 1 if doc_type == " invoice " else 0
for key in settings . FIELD :
if sum ( [ len ( report_file . reviewed_accuracy [ x ] ) for x in report_file . reviewed_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
month [ " average_accuracy_rate " ] [ key ] . add ( report_file . reviewed_accuracy . get ( key , [ ] ) )
elif sum ( [ len ( report_file . feedback_accuracy [ x ] ) for x in report_file . feedback_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
month [ " average_accuracy_rate " ] [ key ] . add ( report_file . feedback_accuracy . get ( key , [ ] ) )
month [ " feedback_accuracy " ] [ key ] . add ( report_file . feedback_accuracy . get ( key , [ ] ) )
month [ " reviewed_accuracy " ] [ key ] . add ( report_file . reviewed_accuracy . get ( key , [ ] ) )
if not month [ " average_processing_time " ] . get ( report_file . doc_type , None ) :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doctype: { report_file . doc_type } " )
2024-06-11 12:11:56 +00:00
month [ " average_processing_time " ] [ report_file . doc_type ] = IterAvg ( )
month [ " average_processing_time " ] [ report_file . doc_type ] . add_avg ( report_file . time_cost , 1 ) if report_file . time_cost else 0
month [ " average_processing_time " ] [ " avg " ] . add_avg ( report_file . time_cost , 1 ) if report_file . time_cost else 0
2024-06-24 08:38:26 +00:00
if not month [ " file_average_processing_time " ] . get ( report_file . doc_type , None ) :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doctype: { report_file . doc_type } " )
2024-06-24 08:38:26 +00:00
month [ " file_average_processing_time " ] [ report_file . doc_type ] = IterAvg ( )
month [ " file_average_processing_time " ] [ report_file . doc_type ] . add_avg ( image_avg_cost , 1 ) if image_avg_cost else 0
month [ " file_average_processing_time " ] [ " avg " ] . add_avg ( image_avg_cost , 1 ) if image_avg_cost else 0
2024-06-11 12:11:56 +00:00
doc_type = " imei "
if report_file . doc_type in [ " imei " , " invoice " , " all " ] :
doc_type = report_file . doc_type
else :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doc type { report_file . doc_type } if request id: { report_file . correspond_request_id } " )
2024-06-11 12:11:56 +00:00
month [ " usage " ] [ " imei " ] + = 1 if doc_type == " imei " else 0
month [ " usage " ] [ " invoice " ] + = 1 if doc_type == " invoice " else 0
month [ " usage " ] [ " total_images " ] + = 1
month [ " review_progress " ] . append ( report_file . review_status )
return month
2024-02-05 05:56:51 +00:00
@staticmethod
2024-06-24 08:38:26 +00:00
def update_day ( day_data , report_file , image_avg_cost ) :
2024-03-06 08:09:29 +00:00
if report_file . bad_image_reason not in settings . ACC_EXCLUDE_RESEASONS :
day_data [ " images_quality " ] [ " successful " ] + = 1 if not report_file . is_bad_image else 0
day_data [ " images_quality " ] [ " bad " ] + = 1 if report_file . is_bad_image else 0
day_data [ " total_images " ] + = 1
doc_type = " imei "
if report_file . doc_type in [ " imei " , " invoice " , " all " ] :
doc_type = report_file . doc_type
else :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doc type { report_file . doc_type } if request id: { report_file . correspond_request_id } " )
2024-03-06 08:09:29 +00:00
day_data [ " num_imei " ] + = 1 if doc_type == " imei " else 0
day_data [ " num_invoice " ] + = 1 if doc_type == " invoice " else 0
day_data [ " report_files " ] . append ( report_file )
2024-03-12 08:52:49 +00:00
for key in settings . FIELD :
if sum ( [ len ( report_file . reviewed_accuracy [ x ] ) for x in report_file . reviewed_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
day_data [ " average_accuracy_rate " ] [ key ] . add ( report_file . reviewed_accuracy . get ( key , [ ] ) )
elif sum ( [ len ( report_file . feedback_accuracy [ x ] ) for x in report_file . feedback_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
day_data [ " average_accuracy_rate " ] [ key ] . add ( report_file . feedback_accuracy . get ( key , [ ] ) )
2024-03-06 08:09:29 +00:00
day_data [ " feedback_accuracy " ] [ key ] . add ( report_file . feedback_accuracy . get ( key , [ ] ) )
day_data [ " reviewed_accuracy " ] [ key ] . add ( report_file . reviewed_accuracy . get ( key , [ ] ) )
2024-02-05 05:56:51 +00:00
2024-03-06 08:09:29 +00:00
if not day_data [ " average_processing_time " ] . get ( report_file . doc_type , None ) :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doctype: { report_file . doc_type } " )
2024-03-06 08:09:29 +00:00
day_data [ " average_processing_time " ] [ report_file . doc_type ] = IterAvg ( )
2024-03-19 09:35:26 +00:00
day_data [ " average_processing_time " ] [ report_file . doc_type ] . add_avg ( report_file . time_cost , 1 ) if report_file . time_cost else 0
day_data [ " average_processing_time " ] [ ' avg ' ] . add_avg ( report_file . time_cost , 1 ) if report_file . time_cost else 0
2024-06-24 08:38:26 +00:00
if not day_data [ " file_average_processing_time " ] . get ( report_file . doc_type , None ) :
2024-06-26 07:58:24 +00:00
logger . warning ( f " Weird doctype: { report_file . doc_type } " )
2024-06-24 08:38:26 +00:00
day_data [ " file_average_processing_time " ] [ report_file . doc_type ] = IterAvg ( )
day_data [ " file_average_processing_time " ] [ report_file . doc_type ] . add_avg ( image_avg_cost , 1 ) if image_avg_cost else 0
day_data [ " file_average_processing_time " ] [ ' avg ' ] . add_avg ( image_avg_cost , 1 ) if image_avg_cost else 0
2024-03-06 08:09:29 +00:00
day_data [ " review_progress " ] . append ( report_file . review_status )
2024-02-05 05:56:51 +00:00
return day_data
2024-04-04 07:07:02 +00:00
def add ( self , request , report_files , report ) :
2024-02-18 14:52:23 +00:00
this_month = timezone . localtime ( request . created_at ) . strftime ( " % Y % m " )
this_day = timezone . localtime ( request . created_at ) . strftime ( " % Y % m %d " )
2024-02-05 05:56:51 +00:00
if not self . data . get ( this_month , None ) :
2024-06-11 12:11:56 +00:00
self . data [ this_month ] = [ copy . deepcopy ( self . month_format ) , { } ]
2024-02-18 14:52:23 +00:00
self . data [ this_month ] [ 0 ] [ " extraction_date " ] = " Subtotal ( " + timezone . localtime ( request . created_at ) . strftime ( " % Y- % m " ) + " ) "
2024-02-05 05:56:51 +00:00
if not self . data [ this_month ] [ 1 ] . get ( this_day , None ) :
2024-06-26 07:58:24 +00:00
logger . info ( f " Adding a new day: { this_day } for report: { report . id } ... " )
2024-02-05 05:56:51 +00:00
self . data [ this_month ] [ 1 ] [ this_day ] = copy . deepcopy ( self . day_format ) [ 0 ]
2024-02-18 14:52:23 +00:00
self . data [ this_month ] [ 1 ] [ this_day ] [ ' extraction_date ' ] = timezone . localtime ( request . created_at ) . strftime ( " % Y- % m- %d " )
2024-02-05 05:56:51 +00:00
usage = self . count_transactions_within_day ( this_day )
self . data [ this_month ] [ 1 ] [ this_day ] [ " usage " ] [ " imei " ] = usage . get ( " imei " , 0 )
self . data [ this_month ] [ 1 ] [ this_day ] [ " usage " ] [ " invoice " ] = usage . get ( " invoice " , 0 )
self . data [ this_month ] [ 1 ] [ this_day ] [ " usage " ] [ " request " ] = usage . get ( " request " , 0 )
2024-02-16 10:38:19 +00:00
self . data [ this_month ] [ 1 ] [ this_day ] [ " usage " ] [ " total_images " ] = usage . get ( " imei " , 0 ) + usage . get ( " invoice " , 0 )
2024-02-05 05:56:51 +00:00
self . data [ this_month ] [ 1 ] [ this_day ] [ ' num_request ' ] + = 1
self . data [ this_month ] [ 0 ] [ ' num_request ' ] + = 1
2024-06-24 08:38:26 +00:00
_number_of_file = request . pages
_be_cost = backend_cost ( request . created_at , request . ai_inference_start_time )
_ai_cost = request . ai_inference_time
processing_time_by_averaging_request_cost = ( _be_cost + _ai_cost ) / _number_of_file if _number_of_file > 0 else 0
2024-02-05 05:56:51 +00:00
for report_file in report_files :
2024-06-26 07:58:24 +00:00
# report_file.time_cost = processing_time_by_averaging_request_cost
2024-02-23 03:56:28 +00:00
_report_file = copy . deepcopy ( report_file )
if _report_file . is_bad_image or _report_file . bad_image_reason in settings . ACC_EXCLUDE_RESEASONS :
_report_file . acc = None
for t in _report_file . feedback_accuracy . keys ( ) :
_report_file . feedback_accuracy [ t ] = [ ]
for t in _report_file . reviewed_accuracy . keys ( ) :
_report_file . reviewed_accuracy [ t ] = [ ]
2024-06-24 08:38:26 +00:00
self . report = self . update_total ( self . report , _report_file , processing_time_by_averaging_request_cost )
self . data [ this_month ] [ 0 ] = self . update_month ( self . data [ this_month ] [ 0 ] , _report_file , processing_time_by_averaging_request_cost ) # Update the subtotal within the month
self . data [ this_month ] [ 1 ] [ this_day ] = self . update_day ( self . data [ this_month ] [ 1 ] [ this_day ] , _report_file , processing_time_by_averaging_request_cost ) # Update the subtotal of the day
2024-02-05 05:56:51 +00:00
def count_transactions_within_day ( self , date_string ) :
start_date = datetime . strptime ( date_string , " % Y % m %d " )
2024-02-06 03:14:44 +00:00
start_date_with_timezone = timezone . make_aware ( start_date )
end_date_with_timezone = start_date_with_timezone + timezone . timedelta ( days = 1 )
return count_transactions ( start_date_with_timezone , end_date_with_timezone , self . sub )
2024-02-05 05:56:51 +00:00
def save ( self , root_report_id , is_daily_report = False , include_test = False ) :
2024-06-11 12:11:56 +00:00
report_data , overall_report = self . get ( )
2024-02-05 05:56:51 +00:00
fine_data = [ ]
save_data = { " file " : { " overview " : f " { root_report_id } / { root_report_id } .xlsx " } ,
2024-06-11 12:11:56 +00:00
" data " : fine_data , # {"sub_report_id": "S3 location", "data": fine_data}
" report " : overall_report }
2024-02-05 05:56:51 +00:00
# extract data
2024-02-06 08:59:27 +00:00
month_keys = list ( report_data . keys ( ) )
month_keys . sort ( reverse = True )
for month in month_keys :
2024-02-05 05:56:51 +00:00
fine_data . append ( report_data [ month ] [ 0 ] )
2024-02-06 04:08:46 +00:00
day_keys = list ( report_data [ month ] [ 1 ] . keys ( ) )
day_keys . sort ( reverse = True )
for day in day_keys :
2024-02-16 10:38:19 +00:00
report_data [ month ] [ 1 ] [ day ] [ ' subs ' ] = map_subsidiary_short_to_long ( report_data [ month ] [ 1 ] [ day ] [ ' subs ' ] )
2024-02-05 05:56:51 +00:00
fine_data . append ( report_data [ month ] [ 1 ] [ day ] )
# save daily reports
report_id = root_report_id + " _ " + day
start_date = datetime . strptime ( day , " % Y % m %d " )
2024-02-06 03:14:44 +00:00
start_date_with_timezone = timezone . make_aware ( start_date )
end_date_with_timezone = start_date_with_timezone + timezone . timedelta ( days = 1 )
2024-02-05 05:56:51 +00:00
_average_OCR_time = { " invoice " : self . data [ month ] [ 1 ] [ day ] [ " average_processing_time " ] [ " invoice " ] ( ) , " imei " : self . data [ month ] [ 1 ] [ day ] [ " average_processing_time " ] [ " imei " ] ( ) ,
" invoice_count " : self . data [ month ] [ 1 ] [ day ] [ " average_processing_time " ] [ " invoice " ] . count , " imei_count " : self . data [ month ] [ 1 ] [ day ] [ " average_processing_time " ] [ " imei " ] . count }
2024-03-06 08:09:29 +00:00
_average_OCR_time [ " invoice " ] = 0 if _average_OCR_time [ " invoice " ] is None else _average_OCR_time [ " invoice " ]
_average_OCR_time [ " imei " ] = 0 if _average_OCR_time [ " imei " ] is None else _average_OCR_time [ " imei " ]
2024-03-19 09:35:26 +00:00
if " avg " not in _average_OCR_time :
_average_OCR_time [ " avg " ] = ( _average_OCR_time [ " invoice " ] * _average_OCR_time [ " invoice_count " ] + _average_OCR_time [ " imei " ] * _average_OCR_time [ " imei_count " ] ) / ( _average_OCR_time [ " imei_count " ] + _average_OCR_time [ " invoice_count " ] ) if ( _average_OCR_time [ " imei_count " ] + _average_OCR_time [ " invoice_count " ] ) > 0 else None
2024-06-24 08:38:26 +00:00
_file_average_OCR_time = { " invoice " : self . data [ month ] [ 1 ] [ day ] [ " file_average_processing_time " ] [ " invoice " ] ( ) , " imei " : self . data [ month ] [ 1 ] [ day ] [ " file_average_processing_time " ] [ " imei " ] ( ) ,
" invoice_count " : self . data [ month ] [ 1 ] [ day ] [ " file_average_processing_time " ] [ " invoice " ] . count , " imei_count " : self . data [ month ] [ 1 ] [ day ] [ " file_average_processing_time " ] [ " imei " ] . count }
_file_average_OCR_time [ " invoice " ] = 0 if _file_average_OCR_time [ " invoice " ] is None else _file_average_OCR_time [ " invoice " ]
_file_average_OCR_time [ " imei " ] = 0 if _file_average_OCR_time [ " imei " ] is None else _file_average_OCR_time [ " imei " ]
if " avg " not in _file_average_OCR_time :
_file_average_OCR_time [ " avg " ] = ( _file_average_OCR_time [ " invoice " ] * _file_average_OCR_time [ " invoice_count " ] + _file_average_OCR_time [ " imei " ] * _file_average_OCR_time [ " imei_count " ] ) / ( _file_average_OCR_time [ " imei_count " ] + _file_average_OCR_time [ " invoice_count " ] ) if ( _file_average_OCR_time [ " imei_count " ] + _file_average_OCR_time [ " invoice_count " ] ) > 0 else None
2024-02-05 05:56:51 +00:00
acumulated_acc = { " feedback_accuracy " : { } ,
" reviewed_accuracy " : { } }
for acc_type in [ " feedback_accuracy " , " reviewed_accuracy " ] :
avg_acc = IterAvg ( )
2024-03-12 08:52:49 +00:00
for key in settings . FIELD :
2024-02-05 05:56:51 +00:00
acumulated_acc [ acc_type ] [ key ] = self . data [ month ] [ 1 ] [ day ] [ acc_type ] [ key ] ( )
acumulated_acc [ acc_type ] [ key + " _count " ] = self . data [ month ] [ 1 ] [ day ] [ acc_type ] [ key ] . count
avg_acc . add_avg ( acumulated_acc [ acc_type ] [ key ] , acumulated_acc [ acc_type ] [ key + " _count " ] )
acumulated_acc [ acc_type ] [ " avg " ] = avg_acc ( )
acumulated_acc [ acc_type ] [ " avg_count " ] = avg_acc . count
new_report : Report = Report (
report_id = report_id ,
is_daily_report = is_daily_report ,
subsidiary = self . sub . lower ( ) . replace ( " " , " " ) ,
include_test = include_test ,
2024-02-06 03:14:44 +00:00
start_at = start_date_with_timezone ,
end_at = end_date_with_timezone ,
2024-02-05 05:56:51 +00:00
status = " Ready " ,
number_request = report_data [ month ] [ 1 ] [ day ] [ " num_request " ] ,
number_images = report_data [ month ] [ 1 ] [ day ] [ " total_images " ] ,
number_imei = report_data [ month ] [ 1 ] [ day ] [ " num_imei " ] ,
number_invoice = report_data [ month ] [ 1 ] [ day ] [ " num_invoice " ] ,
number_bad_images = report_data [ month ] [ 1 ] [ day ] [ " images_quality " ] [ " bad " ] ,
2024-06-24 08:38:26 +00:00
average_OCR_time = _file_average_OCR_time ,
2024-02-05 05:56:51 +00:00
number_imei_transaction = report_data [ month ] [ 1 ] [ day ] [ " usage " ] [ " imei " ] ,
number_invoice_transaction = report_data [ month ] [ 1 ] [ day ] [ " usage " ] [ " invoice " ] ,
feedback_accuracy = acumulated_acc [ " feedback_accuracy " ] ,
reviewed_accuracy = acumulated_acc [ " reviewed_accuracy " ] ,
)
2024-02-07 06:38:04 +00:00
if is_daily_report :
new_report . save ( )
2024-02-16 10:38:19 +00:00
data = extract_report_detail_list ( self . data [ month ] [ 1 ] [ day ] [ " report_files " ] , lower = True )
data_workbook = dict2xlsx ( data , _type = ' report_detail ' )
local_workbook = save_workbook_file ( report_id + " .xlsx " , new_report , data_workbook )
s3_key = save_report_to_S3 ( report_id , local_workbook )
2024-02-06 03:14:44 +00:00
return fine_data , save_data
2024-02-05 05:56:51 +00:00
def get ( self ) - > Any :
# FIXME: This looks like a junk
_data = copy . deepcopy ( self . data )
2024-06-11 12:11:56 +00:00
_report = copy . deepcopy ( self . report )
# export report data
for key in _report [ " average_processing_time " ] . keys ( ) :
_report [ " average_processing_time " ] [ key ] = _report [ " average_processing_time " ] [ key ] ( )
2024-06-24 08:38:26 +00:00
_ks = list ( _report [ " file_average_processing_time " ] . keys ( ) )
for key in _ks :
_report [ " file_average_processing_time " ] [ key + " _count " ] = _report [ " file_average_processing_time " ] [ key ] . count
_report [ " file_average_processing_time " ] [ key ] = _report [ " file_average_processing_time " ] [ key ] ( )
2024-06-11 12:11:56 +00:00
2024-06-17 12:05:20 +00:00
# avg_acc = 0
# count_acc = 0
2024-06-11 12:11:56 +00:00
for key in settings . FIELD :
_report [ " average_accuracy_rate " ] [ key ] = _report [ " average_accuracy_rate " ] [ key ] ( )
for accuracy_type in [ " feedback_accuracy " , " reviewed_accuracy " ] :
2024-06-17 12:05:20 +00:00
# if (_report[accuracy_type][key].count + count_acc) > 0:
# avg_acc = (avg_acc*count_acc + _report[accuracy_type][key].avg*_report[accuracy_type][key].count) / (_report[accuracy_type][key].count + count_acc)
# count_acc += _report[accuracy_type][key].count
2024-06-11 12:11:56 +00:00
_report [ accuracy_type ] [ key ] = _report [ accuracy_type ] [ key ] ( )
2024-06-17 12:05:20 +00:00
_report [ " average_accuracy_rate " ] [ " avg " ] = _report [ " average_accuracy_rate " ] [ " avg " ] ( )
2024-06-11 12:11:56 +00:00
2024-06-28 08:40:18 +00:00
_report [ " review_progress " ] = _report [ " review_progress " ] . count ( 1 ) / ( _report [ " review_progress " ] . count ( 0 ) + _report [ " review_progress " ] . count ( 1 ) ) if ( _report [ " review_progress " ] . count ( 0 ) + _report [ " review_progress " ] . count ( 1 ) ) > 0 else 1.0
2024-06-11 12:11:56 +00:00
_report [ " images_quality " ] [ " successful_percent " ] = _report [ " images_quality " ] [ " successful " ] / _report [ " total_images " ] if _report [ " total_images " ] > 0 else 0
_report [ " images_quality " ] [ " bad_percent " ] = _report [ " images_quality " ] [ " bad " ] / _report [ " total_images " ] if _report [ " total_images " ] > 0 else 0
# export data for dashboard
2024-02-05 05:56:51 +00:00
for month in _data . keys ( ) :
2024-02-06 03:14:44 +00:00
_data [ month ] [ 0 ] [ " images_quality " ] [ " successful_percent " ] = _data [ month ] [ 0 ] [ " images_quality " ] [ " successful " ] / _data [ month ] [ 0 ] [ " total_images " ] if _data [ month ] [ 0 ] [ " total_images " ] > 0 else 0
_data [ month ] [ 0 ] [ " images_quality " ] [ " bad_percent " ] = _data [ month ] [ 0 ] [ " images_quality " ] [ " bad " ] / _data [ month ] [ 0 ] [ " total_images " ] if _data [ month ] [ 0 ] [ " total_images " ] > 0 else 0
2024-02-05 05:56:51 +00:00
num_transaction_imei = 0
num_transaction_invoice = 0
for day in _data [ month ] [ 1 ] . keys ( ) :
num_transaction_imei + = _data [ month ] [ 1 ] [ day ] [ " usage " ] . get ( " imei " , 0 )
num_transaction_invoice + = _data [ month ] [ 1 ] [ day ] [ " usage " ] . get ( " invoice " , 0 )
2024-02-07 05:39:24 +00:00
for key in _data [ month ] [ 1 ] [ day ] [ " average_processing_time " ] . keys ( ) :
_data [ month ] [ 1 ] [ day ] [ " average_processing_time " ] [ key ] = _data [ month ] [ 1 ] [ day ] [ " average_processing_time " ] [ key ] ( )
2024-06-24 08:38:26 +00:00
for key in _data [ month ] [ 1 ] [ day ] [ " file_average_processing_time " ] . keys ( ) :
_data [ month ] [ 1 ] [ day ] [ " file_average_processing_time " ] [ key ] = _data [ month ] [ 1 ] [ day ] [ " file_average_processing_time " ] [ key ] ( )
2024-02-05 05:56:51 +00:00
2024-03-12 08:52:49 +00:00
for key in settings . FIELD :
_data [ month ] [ 1 ] [ day ] [ " average_accuracy_rate " ] [ key ] = _data [ month ] [ 1 ] [ day ] [ " average_accuracy_rate " ] [ key ] ( )
2024-03-14 03:11:20 +00:00
for accuracy_type in [ " feedback_accuracy " , " reviewed_accuracy " ] :
_data [ month ] [ 1 ] [ day ] [ accuracy_type ] [ key ] = _data [ month ] [ 1 ] [ day ] [ accuracy_type ] [ key ] ( )
2024-02-29 06:12:50 +00:00
_data [ month ] [ 1 ] [ day ] [ " review_progress " ] = _data [ month ] [ 1 ] [ day ] [ " review_progress " ] . count ( 1 ) / ( _data [ month ] [ 1 ] [ day ] [ " review_progress " ] . count ( 0 ) + _data [ month ] [ 1 ] [ day ] [ " review_progress " ] . count ( 1 ) ) if ( _data [ month ] [ 1 ] [ day ] [ " review_progress " ] . count ( 0 ) + _data [ month ] [ 1 ] [ day ] [ " review_progress " ] . count ( 1 ) ) > 0 else 0
2024-02-06 03:14:44 +00:00
_data [ month ] [ 1 ] [ day ] . pop ( " report_files " )
_data [ month ] [ 1 ] [ day ] [ " images_quality " ] [ " successful_percent " ] = _data [ month ] [ 1 ] [ day ] [ " images_quality " ] [ " successful " ] / _data [ month ] [ 1 ] [ day ] [ " total_images " ] if _data [ month ] [ 1 ] [ day ] [ " total_images " ] > 0 else 0
_data [ month ] [ 1 ] [ day ] [ " images_quality " ] [ " bad_percent " ] = _data [ month ] [ 1 ] [ day ] [ " images_quality " ] [ " bad " ] / _data [ month ] [ 1 ] [ day ] [ " total_images " ] if _data [ month ] [ 1 ] [ day ] [ " total_images " ] > 0 else 0
2024-02-05 05:56:51 +00:00
_data [ month ] [ 0 ] [ " usage " ] [ " imei " ] = num_transaction_imei
_data [ month ] [ 0 ] [ " usage " ] [ " invoice " ] = num_transaction_invoice
2024-02-16 10:38:19 +00:00
_data [ month ] [ 0 ] [ " usage " ] [ " total_images " ] = num_transaction_invoice + num_transaction_imei
2024-02-07 05:39:24 +00:00
for key in _data [ month ] [ 0 ] [ " average_processing_time " ] . keys ( ) :
_data [ month ] [ 0 ] [ " average_processing_time " ] [ key ] = _data [ month ] [ 0 ] [ " average_processing_time " ] [ key ] ( )
2024-06-24 08:38:26 +00:00
for key in _data [ month ] [ 0 ] [ " file_average_processing_time " ] . keys ( ) :
_data [ month ] [ 0 ] [ " file_average_processing_time " ] [ key ] = _data [ month ] [ 0 ] [ " file_average_processing_time " ] [ key ] ( )
2024-03-12 08:52:49 +00:00
for key in settings . FIELD :
_data [ month ] [ 0 ] [ " average_accuracy_rate " ] [ key ] = _data [ month ] [ 0 ] [ " average_accuracy_rate " ] [ key ] ( )
2024-03-14 03:11:20 +00:00
for accuracy_type in [ " feedback_accuracy " , " reviewed_accuracy " ] :
2024-03-12 08:52:49 +00:00
_data [ month ] [ 0 ] [ accuracy_type ] [ key ] = _data [ month ] [ 0 ] [ accuracy_type ] [ key ] ( )
2024-02-29 06:12:50 +00:00
_data [ month ] [ 0 ] [ " review_progress " ] = _data [ month ] [ 0 ] [ " review_progress " ] . count ( 1 ) / ( _data [ month ] [ 0 ] [ " review_progress " ] . count ( 0 ) + _data [ month ] [ 0 ] [ " review_progress " ] . count ( 1 ) ) if ( _data [ month ] [ 0 ] [ " review_progress " ] . count ( 0 ) + _data [ month ] [ 0 ] [ " review_progress " ] . count ( 1 ) ) > 0 else 0
2024-06-11 12:11:56 +00:00
return _data , _report
2024-02-05 05:56:51 +00:00
2024-01-31 03:00:18 +00:00
class MonthReportAccumulate :
def __init__ ( self ) :
self . month = None
self . total = {
' subs ' : " + " ,
' extraction_date ' : " Subtotal () " ,
' total_images ' : 0 ,
' images_quality ' : {
' successful ' : 0 ,
' successful_percent ' : 0 ,
' bad ' : 0 ,
' bad_percent ' : 0
} ,
' average_accuracy_rate ' : {
' imei ' : IterAvg ( ) ,
' purchase_date ' : IterAvg ( ) ,
2024-03-13 06:18:57 +00:00
' retailer_name ' : IterAvg ( ) ,
' invoice_no ' : IterAvg ( )
2024-01-31 03:00:18 +00:00
} ,
' average_processing_time ' : {
' imei ' : IterAvg ( ) ,
' invoice ' : IterAvg ( )
} ,
' usage ' : {
' imei ' : 0 ,
' invoice ' : 0
}
}
self . data = [ ]
self . data_format = {
2024-02-01 07:32:20 +00:00
' subs ' : " " ,
' extraction_date ' : " " ,
2024-01-31 03:00:18 +00:00
' num_imei ' : 0 ,
' num_invoice ' : 0 ,
' total_images ' : 0 ,
' images_quality ' : {
' successful ' : 0 ,
' successful_percent ' : 0 ,
' bad ' : 0 ,
' bad_percent ' : 0
} ,
' average_accuracy_rate ' : {
' imei ' : 0 ,
' purchase_date ' : 0 ,
2024-03-13 06:18:57 +00:00
' retailer_name ' : 0 ,
' invoice_no ' : 0
2024-01-31 03:00:18 +00:00
} ,
' average_processing_time ' : {
' imei ' : 0 ,
' invoice ' : 0
} ,
' usage ' : {
' imei ' : 0 ,
' invoice ' : 0
}
} ,
def accumulate ( self , report ) :
self . total [ " total_images " ] + = report . number_images
self . total [ " images_quality " ] [ " successful " ] + = report . number_images - report . number_bad_images
self . total [ " images_quality " ] [ " bad " ] + = report . number_bad_images
2024-02-01 07:32:20 +00:00
2024-01-31 03:00:18 +00:00
if sum ( [ report . reviewed_accuracy [ x ] for x in report . reviewed_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
self . total [ " average_accuracy_rate " ] [ " imei " ] . add_avg ( report . reviewed_accuracy . get ( " imei_number " , 0 ) , report . reviewed_accuracy . get ( " imei_number_count " , 0 ) )
self . total [ " average_accuracy_rate " ] [ " purchase_date " ] . add_avg ( report . reviewed_accuracy . get ( " purchase_date " , 0 ) , report . reviewed_accuracy . get ( " purchase_date_count " , 0 ) )
self . total [ " average_accuracy_rate " ] [ " retailer_name " ] . add_avg ( report . reviewed_accuracy . get ( " retailername " , 0 ) , report . reviewed_accuracy . get ( " retailername_count " , 0 ) )
2024-03-13 06:18:57 +00:00
self . total [ " average_accuracy_rate " ] [ " invoice_no " ] . add_avg ( report . reviewed_accuracy . get ( " invoice_no " , 0 ) , report . reviewed_accuracy . get ( " invoice_no_count " , 0 ) )
2024-01-31 03:00:18 +00:00
elif sum ( [ report . feedback_accuracy [ x ] for x in report . feedback_accuracy . keys ( ) if " _count " not in x ] ) > 0 :
self . total [ " average_accuracy_rate " ] [ " imei " ] . add_avg ( report . feedback_accuracy . get ( " imei_number " , 0 ) , report . feedback_accuracy . get ( " imei_number_count " , 0 ) )
self . total [ " average_accuracy_rate " ] [ " purchase_date " ] . add_avg ( report . feedback_accuracy . get ( " purchase_date " , 0 ) , report . feedback_accuracy . get ( " purchase_date_count " , 0 ) )
self . total [ " average_accuracy_rate " ] [ " retailer_name " ] . add_avg ( report . feedback_accuracy . get ( " retailername " , 0 ) , report . feedback_accuracy . get ( " retailername_count " , 0 ) )
2024-03-13 06:18:57 +00:00
self . total [ " average_accuracy_rate " ] [ " invoice_no " ] . add_avg ( report . feedback_accuracy . get ( " invoice_no " , 0 ) , report . feedback_accuracy . get ( " invoice_no_count " , 0 ) )
2024-01-31 03:00:18 +00:00
2024-02-01 07:32:20 +00:00
self . total [ " average_processing_time " ] [ " imei " ] . add_avg ( report . average_OCR_time . get ( " imei " , 0 ) , report . average_OCR_time . get ( " imei_count " , 0 ) ) if report . average_OCR_time else 0
self . total [ " average_processing_time " ] [ " invoice " ] . add_avg ( report . average_OCR_time . get ( " invoice " , 0 ) , report . average_OCR_time . get ( " invoice_count " , 0 ) ) if report . average_OCR_time else 0
2024-01-31 03:00:18 +00:00
self . total [ " usage " ] [ " imei " ] + = report . number_imei_transaction
self . total [ " usage " ] [ " invoice " ] + = report . number_invoice_transaction
def add ( self , report ) :
2024-02-05 05:56:51 +00:00
report_month = report . start_at . month
2024-01-31 03:00:18 +00:00
if self . month is None :
self . month = report_month
self . total [ " extraction_date " ] = f " Subtotal ( { self . month } ) "
2024-02-01 07:32:20 +00:00
2024-01-31 03:00:18 +00:00
elif self . month != report_month :
self . total [ " images_quality " ] [ " successful_percent " ] + = self . total [ " images_quality " ] [ " successful " ] / self . total [ " total_images " ]
self . total [ " images_quality " ] [ " bad_percent " ] + = self . total [ " images_quality " ] [ " bad " ] / self . total [ " total_images " ]
return False # Reports from a different month, stop accumulating
# accumulate fields
new_data = copy . deepcopy ( self . data_format ) [ 0 ]
new_data [ " num_imei " ] = report . number_imei
2024-02-01 07:32:20 +00:00
new_data [ " subs " ] = report . subsidiary
2024-02-05 05:56:51 +00:00
new_data [ " extraction_date " ] = report . start_at
2024-01-31 03:00:18 +00:00
new_data [ " num_invoice " ] = report . number_invoice
new_data [ " total_images " ] = report . number_images
new_data [ " images_quality " ] [ " successful " ] = report . number_images - report . number_bad_images
new_data [ " images_quality " ] [ " bad " ] = report . number_bad_images
2024-02-01 07:32:20 +00:00
report . reviewed_accuracy = { } if report . reviewed_accuracy is None else report . reviewed_accuracy
report . feedback_accuracy = { } if report . feedback_accuracy is None else report . feedback_accuracy
2024-01-31 03:00:18 +00:00
if sum ( [ report . reviewed_accuracy [ x ] for x in report . reviewed_accuracy . keys ( ) if " _count " not in x ] ) :
new_data [ " average_accuracy_rate " ] [ " imei " ] = report . reviewed_accuracy . get ( " imei_number " , None )
new_data [ " average_accuracy_rate " ] [ " purchase_date " ] = report . reviewed_accuracy . get ( " purchase_date " , None )
new_data [ " average_accuracy_rate " ] [ " retailer_name " ] = report . reviewed_accuracy . get ( " retailername " , None )
2024-03-13 06:18:57 +00:00
new_data [ " average_accuracy_rate " ] [ " invoice_no " ] = report . reviewed_accuracy . get ( " invoice_no " , None )
2024-01-31 03:00:18 +00:00
elif sum ( [ report . feedback_accuracy [ x ] for x in report . feedback_accuracy . keys ( ) if " _count " not in x ] ) :
new_data [ " average_accuracy_rate " ] [ " imei " ] = report . feedback_accuracy . get ( " imei_number " , None )
new_data [ " average_accuracy_rate " ] [ " purchase_date " ] = report . feedback_accuracy . get ( " purchase_date " , None )
new_data [ " average_accuracy_rate " ] [ " retailer_name " ] = report . feedback_accuracy . get ( " retailername " , None )
2024-03-13 06:18:57 +00:00
new_data [ " average_accuracy_rate " ] [ " invoice_no " ] = report . feedback_accuracy . get ( " invoice_no " , None )
2024-02-01 07:32:20 +00:00
new_data [ " average_processing_time " ] [ " imei " ] = report . average_OCR_time . get ( " imei " , 0 ) if report . average_OCR_time else 0
new_data [ " average_processing_time " ] [ " invoice " ] = report . average_OCR_time . get ( " invoice " , 0 ) if report . average_OCR_time else 0
2024-01-31 03:00:18 +00:00
new_data [ " usage " ] [ " imei " ] = report . number_imei_transaction
new_data [ " usage " ] [ " invoice " ] = report . number_invoice_transaction
2024-02-01 07:32:20 +00:00
new_data [ " images_quality " ] [ " successful_percent " ] + = new_data [ " images_quality " ] [ " successful " ] / new_data [ " total_images " ] if new_data [ " total_images " ] else 0
new_data [ " images_quality " ] [ " bad_percent " ] + = new_data [ " images_quality " ] [ " bad " ] / new_data [ " total_images " ] if new_data [ " total_images " ] else 0
2024-01-31 03:00:18 +00:00
self . data . append ( new_data )
self . accumulate ( report )
return True
2024-02-02 11:29:10 +00:00
def clear ( self ) :
self . month = None
self . total = {
' subs ' : " + " ,
' extraction_date ' : " Subtotal () " ,
' total_images ' : 0 ,
' images_quality ' : {
' successful ' : 0 ,
' successful_percent ' : 0 ,
' bad ' : 0 ,
' bad_percent ' : 0
} ,
' average_accuracy_rate ' : {
' imei ' : IterAvg ( ) ,
' purchase_date ' : IterAvg ( ) ,
' retailer_name ' : IterAvg ( )
} ,
' average_processing_time ' : {
' imei ' : IterAvg ( ) ,
' invoice ' : IterAvg ( )
} ,
' usage ' : {
' imei ' : 0 ,
' invoice ' : 0
}
}
self . data = [ ]
2024-01-31 03:00:18 +00:00
def __call__ ( self ) :
total = copy . deepcopy ( self . total )
2024-02-02 11:29:10 +00:00
total [ " images_quality " ] [ " successful_percent " ] = total [ " images_quality " ] [ " successful " ] / total [ " total_images " ] if total [ " total_images " ] else 0
total [ " images_quality " ] [ " bad_percent " ] = total [ " images_quality " ] [ " bad " ] / total [ " total_images " ] if total [ " total_images " ] else 0
2024-01-31 03:00:18 +00:00
total [ " average_accuracy_rate " ] [ " imei " ] = total [ " average_accuracy_rate " ] [ " imei " ] ( )
total [ " average_accuracy_rate " ] [ " purchase_date " ] = total [ " average_accuracy_rate " ] [ " purchase_date " ] ( )
total [ " average_accuracy_rate " ] [ " retailer_name " ] = total [ " average_accuracy_rate " ] [ " retailer_name " ] ( )
total [ " average_processing_time " ] [ " imei " ] = total [ " average_processing_time " ] [ " imei " ] ( )
total [ " average_processing_time " ] [ " invoice " ] = total [ " average_processing_time " ] [ " invoice " ] ( )
return self . month , self . data , total
class IterAvg :
def __init__ ( self , name = " default " ) :
self . name = name
self . avg = 0
self . count = 0
def add ( self , values ) :
"""
Args :
values ( list [ float ] ) :
"""
values = [ x for x in values if x is not None ]
if len ( values ) == 0 :
return
self . avg = ( self . avg * self . count + sum ( values ) ) / ( self . count + len ( values ) )
self . count + = len ( values )
def add_avg ( self , avg , count ) :
if avg is None or count is None or count == 0 :
return
self . count + = count
self . avg = ( self . avg * ( self . count - count ) + avg * count ) / ( self . count )
def __call__ ( self ) :
2024-03-06 08:09:29 +00:00
if self . count == 0 :
return None
2024-01-31 03:00:18 +00:00
return self . avg
2024-02-05 05:56:51 +00:00
def validate_feedback_file ( feedback , predict ) :
if feedback :
imei_feedback = feedback . get ( " imei_number " , [ ] )
imei_feedback = [ x for x in imei_feedback if x != " " ]
num_imei_feedback = len ( imei_feedback )
num_imei_predict = len ( predict . get ( " imei_number " , [ ] ) )
if num_imei_feedback != num_imei_predict :
return False
2024-05-20 04:07:39 +00:00
feedback [ " imei_number " ] = imei_feedback
2024-02-05 05:56:51 +00:00
return True
2024-03-13 06:18:57 +00:00
2024-02-01 07:32:20 +00:00
def first_of_list ( the_list ) :
if not the_list :
return None
return the_list [ 0 ]
2024-04-04 06:58:16 +00:00
def _feedback_invoice_no_exist ( feedback_result ) :
2024-04-04 08:18:22 +00:00
if feedback_result is None :
return True
if not isinstance ( feedback_result , dict ) :
return True
2024-04-04 06:58:16 +00:00
invoice_no = feedback_result . get ( " invoice_no " , None )
if invoice_no in [ " " , [ ] , None ] :
return False
else :
return True
2024-02-01 07:32:20 +00:00
def extract_report_detail_list ( report_detail_list , lower = False , in_percent = True ) :
data = [ ]
for report_file in report_detail_list :
data . append ( {
2024-02-16 10:38:19 +00:00
" Subs " : report_file . subsidiary ,
2024-02-01 07:32:20 +00:00
" Request ID " : report_file . correspond_request_id ,
2024-03-20 10:03:40 +00:00
" OCR Extraction Date " : format_datetime_for_report_detail ( report_file . correspond_request_created_at ) ,
2024-02-01 07:32:20 +00:00
" Redemption Number " : report_file . correspond_redemption_id ,
" Image type " : report_file . doc_type ,
2024-03-15 01:46:06 +00:00
" IMEI_user submitted " : first_of_list ( report_file . feedback_result . get ( " imei_number " , [ None ] ) ) if report_file . feedback_result else None ,
2024-02-01 07:32:20 +00:00
" IMEI_OCR retrieved " : first_of_list ( report_file . predict_result . get ( " imei_number " , [ None ] ) ) ,
2024-03-14 03:11:20 +00:00
" IMEI Revised " : first_of_list ( report_file . reviewed_result . get ( " imei_number " , [ None ] ) ) if report_file . reviewed_result else None ,
2024-02-01 07:32:20 +00:00
" IMEI1 Accuracy " : first_of_list ( report_file . feedback_accuracy . get ( " imei_number " , [ None ] ) ) ,
2024-03-14 03:11:20 +00:00
" Invoice_Number_User " : report_file . feedback_result . get ( " invoice_no " , None ) if report_file . feedback_result else None ,
2024-03-13 07:20:27 +00:00
" Invoice_Number_OCR " : report_file . predict_result . get ( " invoice_no " , None ) ,
2024-03-14 03:11:20 +00:00
" Invoice_Number Revised " : report_file . reviewed_result . get ( " invoice_no " , None ) if report_file . reviewed_result else None ,
2024-04-04 06:58:16 +00:00
" Invoice_Number_Accuracy " : first_of_list ( report_file . feedback_accuracy . get ( " invoice_no " , [ None ] ) ) if _feedback_invoice_no_exist ( report_file . feedback_result ) else None ,
2024-03-18 06:04:05 +00:00
" Invoice_Purchase Date_Consumer " : report_file . feedback_result . get ( " purchase_date " , None ) if report_file . feedback_result else None ,
2024-03-20 10:03:40 +00:00
" Invoice_Purchase Date_OCR " : format_purchase_date_ocr_for_report ( report_file . predict_result . get ( " purchase_date " , [ ] ) ) ,
2024-03-14 03:11:20 +00:00
" Invoice_Purchase Date Revised " : report_file . reviewed_result . get ( " purchase_date " , None ) if report_file . reviewed_result else None ,
2024-02-01 07:32:20 +00:00
" Invoice_Purchase Date Accuracy " : first_of_list ( report_file . feedback_accuracy . get ( " purchase_date " , [ None ] ) ) ,
2024-03-18 06:04:05 +00:00
" Invoice_Retailer_Consumer " : report_file . feedback_result . get ( " retailername " , None ) if report_file . feedback_result else None ,
2024-02-01 07:32:20 +00:00
" Invoice_Retailer_OCR " : report_file . predict_result . get ( " retailername " , None ) ,
2024-03-14 03:11:20 +00:00
" Invoice_Retailer Revised " : report_file . reviewed_result . get ( " retailername " , None ) if report_file . reviewed_result else None ,
2024-02-01 07:32:20 +00:00
" Invoice_Retailer Accuracy " : first_of_list ( report_file . feedback_accuracy . get ( " retailername " , [ None ] ) ) ,
" OCR Image Accuracy " : report_file . acc ,
" OCR Image Speed (seconds) " : report_file . time_cost ,
2024-02-21 05:46:41 +00:00
" Is Reviewed " : report_file . is_reviewed ,
2024-02-01 07:32:20 +00:00
" Bad Image Reasons " : report_file . bad_image_reason ,
" Countermeasures " : report_file . counter_measures ,
" IMEI_Revised Accuracy " : first_of_list ( report_file . reviewed_accuracy . get ( " imei_number " , [ None ] ) ) ,
" Purchase Date_Revised Accuracy " : first_of_list ( report_file . reviewed_accuracy . get ( " purchase_date " , [ None ] ) ) ,
" Retailer_Revised Accuracy " : first_of_list ( report_file . reviewed_accuracy . get ( " retailername " , [ None ] ) ) ,
2024-03-13 07:20:27 +00:00
" Invoice_Number_Revised Accuracy " : first_of_list ( report_file . reviewed_accuracy . get ( " invoice_no " , [ None ] ) )
2024-02-01 07:32:20 +00:00
} )
if lower :
for i , dat in enumerate ( data ) :
keys = list ( dat . keys ( ) )
for old_key in keys :
data [ i ] [ old_key . lower ( ) . replace ( " " , " _ " ) ] = data [ i ] . pop ( old_key )
if in_percent :
for i , dat in enumerate ( data ) :
keys = [ x for x in list ( dat . keys ( ) ) if " accuracy " in x . lower ( ) ]
for key in keys :
if data [ i ] [ key ] :
data [ i ] [ key ] = data [ i ] [ key ] * 100
return data
2024-03-20 10:03:40 +00:00
def format_datetime_for_report_detail ( ocr_extraction_date ) :
naived_time = timezone . make_naive ( ocr_extraction_date )
ocr_extraction_date = timezone . make_aware ( value = naived_time , timezone = timezone . get_current_timezone ( ) )
2024-03-29 10:47:28 +00:00
format_to_date = ' % Y- % m- %d '
2024-03-20 10:03:40 +00:00
ocr_extraction_date = ocr_extraction_date . strftime ( format_to_date )
return ocr_extraction_date
def format_purchase_date_ocr_for_report ( ocr_result ) :
return " , " . join ( ocr_result )
2024-02-06 03:14:44 +00:00
def count_transactions ( start_date , end_date , subsidiary = " all " ) :
2024-02-01 07:32:20 +00:00
base_query = Q ( created_at__range = ( start_date , end_date ) )
base_query & = Q ( is_test_request = False )
2024-02-23 08:50:52 +00:00
if subsidiary and subsidiary . lower ( ) . replace ( " " , " " ) not in settings . SUB_FOR_BILLING :
2024-02-06 03:14:44 +00:00
base_query & = Q ( redemption_id__startswith = subsidiary )
2024-02-01 07:32:20 +00:00
transaction_att = { }
total_transaction_requests = SubscriptionRequest . objects . filter ( base_query ) . order_by ( ' created_at ' )
for request in total_transaction_requests :
if not request . doc_type :
continue
doc_types = request . doc_type . split ( " , " )
for doc_type in doc_types :
if transaction_att . get ( doc_type , None ) == None :
transaction_att [ doc_type ] = 1
else :
transaction_att [ doc_type ] + = 1
2024-02-05 05:56:51 +00:00
if not transaction_att . get ( " request " , None ) :
transaction_att [ " request " ] = 1
else :
transaction_att [ " request " ] + = 1
2024-02-01 07:32:20 +00:00
return transaction_att
2024-01-31 03:00:18 +00:00
def convert_datetime_format ( date_string : str , is_gt = False ) - > str :
# pattern_date_string = "2023-02-28"
input_format = " % Y- % m- %d "
output_format = " %d / % m/ % Y "
# Validate the input date string format
pattern = r " \ d {4} - \ d {2} - \ d {2} "
if re . match ( pattern , date_string ) :
# Convert the date string to a datetime object
date_object = datetime . strptime ( date_string , input_format )
# Convert the datetime object to the desired output format
formatted_date = date_object . strftime ( output_format )
return formatted_date
return date_string
def predict_result_to_ready ( result ) :
dict_result = { " retailername " : " " ,
" sold_to_party " : " " ,
2024-03-13 06:18:57 +00:00
" invoice_no " : " " ,
2024-01-31 03:00:18 +00:00
" purchase_date " : [ ] ,
" imei_number " : [ ] , }
2024-02-22 07:53:33 +00:00
if not result :
return dict_result
2024-01-31 03:00:18 +00:00
dict_result [ " retailername " ] = result . get ( " content " , { } ) . get ( " document " , [ { } ] ) [ 0 ] . get ( " content " , [ { } ] ) [ 0 ] . get ( " value " , None )
dict_result [ " sold_to_party " ] = result . get ( " content " , { } ) . get ( " document " , [ { } ] ) [ 0 ] . get ( " content " , [ { } , { } ] ) [ 1 ] . get ( " value " , None )
2024-03-15 01:46:06 +00:00
dict_result [ " purchase_date " ] = result . get ( " content " , { } ) . get ( " document " , [ { } ] ) [ 0 ] . get ( " content " , [ { } , { } , { } ] ) [ 2 ] . get ( " value " , [ ] )
dict_result [ " imei_number " ] = result . get ( " content " , { } ) . get ( " document " , [ { } ] ) [ 0 ] . get ( " content " , [ { } , { } , { } , { } ] ) [ 3 ] . get ( " value " , [ ] )
dict_result [ " invoice_no " ] = result . get ( " content " , { } ) . get ( " document " , [ { } ] ) [ 0 ] . get ( " content " , [ { } , { } , { } , { } , { } ] ) [ 4 ] . get ( " value " , None )
2024-01-31 03:00:18 +00:00
return dict_result
def update_temp_accuracy ( accuracy , acc , keys ) :
for key in keys :
accuracy [ key ] . add ( acc [ key ] )
return accuracy
2024-02-16 10:38:19 +00:00
2024-04-04 06:58:16 +00:00
def _accuracy_calculate_formatter ( inference , target ) :
2024-01-31 03:00:18 +00:00
""" _summary_
2024-04-04 06:58:16 +00:00
format type of inference , and target from str / None to List of str / None .
Make both list inference and target to be the same length .
"""
if not isinstance ( inference , list ) :
2024-06-17 12:05:20 +00:00
# inference = [] if inference is None else [inference]
inference = [ inference ]
2024-04-04 06:58:16 +00:00
if not isinstance ( target , list ) :
2024-06-17 12:05:20 +00:00
# target = [] if target is None else [target]
target = [ target ]
2024-04-04 06:58:16 +00:00
length = max ( len ( target ) , len ( inference ) )
2024-04-08 07:45:31 +00:00
target = target + ( length - len ( target ) ) * [ target [ 0 ] ] if len ( target ) > 0 else target + ( length - len ( target ) ) * [ None ]
2024-04-04 06:58:16 +00:00
inference = inference + ( length - len ( inference ) ) * [ None ]
return inference , target
2024-01-31 03:00:18 +00:00
2024-05-03 06:26:42 +00:00
def _acc_will_be_ignored ( key_name , _target ) :
2024-06-17 12:05:20 +00:00
is_optional_key = key_name in OPTIONAL_KEYS
2024-04-04 06:58:16 +00:00
is_empty_target = _target in [ [ ] , None , ' ' ]
2024-05-03 06:26:42 +00:00
if is_optional_key and is_empty_target :
2024-04-04 06:58:16 +00:00
return True
else :
return False
2024-04-05 11:50:41 +00:00
def calculate_accuracy ( key_name : str , inference : Dict [ str , Union [ str , List ] ] , target : Dict [ str , Union [ str , List ] ] , type : str , sub : str ) :
2024-04-04 06:58:16 +00:00
""" _summary_
NOTE : This has been changed to return accuracy = None if
2024-01-31 03:00:18 +00:00
Args :
key_name ( string ) : key to calculate accuracy on , ex : retailername
inference ( dict ) : result from ocr , refined to align with the target down below
target ( dict ) : result of type
2024-04-04 06:58:16 +00:00
is_optional_keyname : default is set to False ( which mean this is not an optional keyname )
currently we have invoice_no is an optional keyname .
2024-01-31 03:00:18 +00:00
"""
acc = [ ]
data = [ ]
if not target or not inference :
return acc , data
2024-04-04 06:58:16 +00:00
_inference = inference [ key_name ]
_target = target [ key_name ]
2024-06-20 08:02:10 +00:00
# _will_acc_be_ignored = _acc_will_be_ignored(key_name, _target)
2024-04-04 08:18:22 +00:00
_inference , _target = _accuracy_calculate_formatter ( _inference , _target )
2024-04-04 06:58:16 +00:00
for i , v in enumerate ( _inference ) :
# TODO: target[i] is None, ""
2024-04-05 11:50:41 +00:00
x = post_processing_str ( key_name , _inference [ i ] , is_gt = False , sub = sub )
y = post_processing_str ( key_name , _target [ i ] , is_gt = True , sub = sub )
2024-01-31 03:00:18 +00:00
score = eval_ocr_metric (
[ x ] ,
[ y ] ,
metric = [
" one_minus_ned " ,
# "line_acc_ignore_case_symbol",
# "line_acc",
# "one_minus_ned_word",
] )
2024-06-20 08:02:10 +00:00
acc . append ( list ( score . values ( ) ) [ 0 ] )
2024-01-31 03:00:18 +00:00
data . append ( [ x , y ] )
return acc , data
def calculate_avg_accuracy ( acc , type , keys = [ ] ) :
acc_list = [ ]
for key in keys :
2024-02-16 10:38:19 +00:00
this_acc = acc . get ( type , { } ) . get ( key , [ ] )
if len ( this_acc ) > 0 :
this_acc = [ max ( this_acc ) ]
acc_list + = this_acc
2024-01-31 03:00:18 +00:00
acc_list = [ x for x in acc_list if x is not None ]
return sum ( acc_list ) / len ( acc_list ) if len ( acc_list ) > 0 else None
2024-02-16 10:38:19 +00:00
def acc_maximize_list_values ( acc ) :
2024-02-22 20:58:10 +00:00
pos = { }
2024-02-16 10:38:19 +00:00
for k in acc . keys ( ) :
2024-02-22 20:58:10 +00:00
pos [ k ] = 0
2024-02-16 10:38:19 +00:00
if isinstance ( acc [ k ] , list ) and len ( acc [ k ] ) > 0 :
acc [ k ] = [ max ( acc [ k ] ) ]
2024-02-22 20:58:10 +00:00
pos [ k ] = acc [ k ] . index ( acc [ k ] [ 0 ] )
return acc , pos
2024-02-16 10:38:19 +00:00
2024-03-06 07:56:02 +00:00
def create_billing_data ( subscription_requests ) :
billing_data = [ ]
for request in subscription_requests :
if request . status != 200 :
continue
images = SubscriptionRequestFile . objects . filter ( request = request , file_category = FileCategory . Origin . value )
for image in images :
if not image . doc_type :
_doc_type = image . file_name . split ( " _ " ) [ 1 ]
if _doc_type in [ " imei " , " invoice " ] :
image . doc_type = _doc_type
image . save ( )
else :
_doc_type = image . doc_type
doc_type = " SN/IMEI " if _doc_type == " imei " else " Invoice "
_sub = " "
redemption_id = " "
if request . redemption_id :
_sub = map_subsidiary_short_to_long ( request . redemption_id [ : 2 ] )
redemption_id = request . redemption_id
2024-03-29 10:47:28 +00:00
format_to_time = ' % Y- % m- %d % H: % M '
format_to_date = ' % Y- % m- %d '
2024-03-06 07:56:02 +00:00
format_to_month = ' % B % Y '
2024-03-06 09:11:10 +00:00
naive_created_time = timezone . make_naive ( request . created_at )
rq_created_at = timezone . make_aware ( value = naive_created_time , timezone = timezone . get_current_timezone ( ) )
2024-03-06 07:56:02 +00:00
rq_month = rq_created_at . strftime ( format_to_month )
rq_date = rq_created_at . strftime ( format_to_date )
rq_time = rq_created_at . strftime ( format_to_time )
billing_data . append ( {
" request_month " : rq_month ,
" subsidiary " : _sub ,
" image_type " : doc_type ,
" redemption_number " : redemption_id ,
" request_id " : request . request_id ,
" request_date " : rq_date ,
" request_time_(utc) " : rq_time
} )
return billing_data
2024-05-17 12:39:31 +00:00
def avg_dict ( data ) :
values = [ ]
for k , v in data . items ( ) :
if isinstance ( v , list ) :
values + = v
return sum ( values ) / len ( values ) if len ( values ) > 0 else - 1
2024-02-05 05:56:51 +00:00
def calculate_a_request ( report , request ) :
2024-03-11 09:52:18 +00:00
def review_status_map ( input ) :
review_status = { - 1 : " Not Required " ,
0 : " No " ,
1 : " Yes " }
return review_status . get ( input , " N/A " )
2024-05-09 10:11:09 +00:00
atts = [ ]
2024-02-05 05:56:51 +00:00
request_att = { " acc " : { " feedback " : { " imei_number " : [ ] ,
" purchase_date " : [ ] ,
" retailername " : [ ] ,
" sold_to_party " : [ ] ,
2024-03-13 06:18:57 +00:00
" invoice_no " : [ ] ,
2024-02-05 05:56:51 +00:00
} ,
" reviewed " : { " imei_number " : [ ] ,
2024-02-18 14:52:23 +00:00
" purchase_date " : [ ] ,
" retailername " : [ ] ,
" sold_to_party " : [ ] ,
2024-03-13 06:18:57 +00:00
" invoice_no " : [ ] ,
2024-02-18 14:52:23 +00:00
} ,
" acumulated " : { " imei_number " : [ ] ,
2024-02-05 05:56:51 +00:00
" purchase_date " : [ ] ,
" retailername " : [ ] ,
" sold_to_party " : [ ] ,
2024-03-13 06:18:57 +00:00
" invoice_no " : [ ] ,
2024-02-05 05:56:51 +00:00
} } ,
" err " : [ ] ,
2024-02-22 20:58:10 +00:00
" time_cost " : { " imei " : [ ] ,
" invoice " : [ ] } ,
2024-02-05 05:56:51 +00:00
" total_images " : 0 ,
2024-02-18 14:52:23 +00:00
" bad_images " : 0 ,
" bad_image_list " : [ ] ,
2024-02-28 11:45:10 +00:00
" is_reviewed " : [ ] , # -1: No need to review, 0: Not reviewed, 1: Reviewed
2024-02-18 14:52:23 +00:00
}
images = SubscriptionRequestFile . objects . filter ( request = request , file_category = FileCategory . Origin . value )
2024-02-05 05:56:51 +00:00
report_files = [ ]
for image in images :
2024-06-20 08:02:10 +00:00
status , att = calculate_subcription_file ( image , request . subsidiary )
2024-02-22 20:58:10 +00:00
att [ " acc " ] [ " feedback " ] , fb_max_indexes = acc_maximize_list_values ( att [ " acc " ] [ " feedback " ] )
att [ " acc " ] [ " reviewed " ] , rv_max_indexes = acc_maximize_list_values ( att [ " acc " ] [ " reviewed " ] )
2024-06-20 08:02:10 +00:00
_att = copy . deepcopy ( att ) # deep copy right here to advoid removing acc for bad images in the next steps
2024-02-22 20:58:10 +00:00
2024-05-17 12:39:31 +00:00
fb_avg_acc = avg_dict ( att [ " acc " ] [ " feedback " ] )
rv_avg_acc = avg_dict ( att [ " acc " ] [ " reviewed " ] )
2024-06-11 07:22:17 +00:00
image . is_required = fb_avg_acc < settings . NEED_REVIEW
2024-05-17 12:39:31 +00:00
2024-05-03 01:50:44 +00:00
if image . processing_time < 0 :
continue
2024-02-05 05:56:51 +00:00
if status != 200 :
continue
2024-06-20 08:02:10 +00:00
atts . append ( att )
2024-02-16 10:38:19 +00:00
image . feedback_accuracy = att [ " acc " ] [ " feedback " ] # dict {key: [values]}
2024-04-04 06:58:16 +00:00
image . is_bad_image_quality = att [ " is_bad_image " ] # is_bad_image=avg_acc<threshold (avg_acc=feedback_acc)
if att [ " is_reviewed " ] == 1 : # Image is already reviewed
image . reviewed_accuracy = att [ " acc " ] [ " reviewed " ] # dict {key: [values]}
2024-02-16 10:38:19 +00:00
if not image . doc_type :
_doc_type = image . file_name . split ( " _ " ) [ 1 ]
if _doc_type in [ " imei " , " invoice " ] :
image . doc_type = _doc_type
2024-02-05 05:56:51 +00:00
image . save ( )
2024-04-04 06:58:16 +00:00
2024-02-16 10:38:19 +00:00
_sub = " NA "
if request . redemption_id :
_sub = map_subsidiary_short_to_long ( request . redemption_id [ : 2 ] )
else :
2024-06-26 07:58:24 +00:00
logger . warning ( f " empty redemption_id, check request: { request . request_id } " )
2024-02-22 20:58:10 +00:00
# Little trick to replace purchase date to normalized
if len ( att [ " normalized_data " ] [ " feedback " ] . get ( " purchase_date " , [ ] ) ) > 0 :
2024-04-04 06:58:16 +00:00
image . predict_result [ " purchase_date " ] = [ value_pair [ 0 ] for value_pair in att [ " normalized_data " ] [ " feedback " ] [ " purchase_date " ] ]
2024-02-22 20:58:10 +00:00
image . feedback_result [ " purchase_date " ] = att [ " normalized_data " ] [ " feedback " ] [ " purchase_date " ] [ fb_max_indexes [ " purchase_date " ] ] [ 1 ]
if len ( att [ " normalized_data " ] [ " reviewed " ] . get ( " purchase_date " , [ ] ) ) > 0 :
2024-04-04 06:58:16 +00:00
image . predict_result [ " purchase_date " ] = [ value_pair [ 0 ] for value_pair in att [ " normalized_data " ] [ " reviewed " ] [ " purchase_date " ] ]
2024-02-22 20:58:10 +00:00
image . reviewed_result [ " purchase_date " ] = att [ " normalized_data " ] [ " reviewed " ] [ " purchase_date " ] [ rv_max_indexes [ " purchase_date " ] ] [ 1 ]
2024-02-28 11:45:10 +00:00
request_att [ " is_reviewed " ] . append ( att [ " is_reviewed " ] )
2024-04-04 06:58:16 +00:00
2024-04-04 08:18:22 +00:00
if att [ " is_reviewed " ] != 1 :
2024-04-04 06:58:16 +00:00
att [ " acc " ] [ " reviewed " ] = { }
reviewed_result = { }
reason = None
counter_measure = None
else :
2024-04-04 08:18:22 +00:00
reviewed_result = image . reviewed_result
reason = image . reason
counter_measure = image . counter_measures
2024-04-04 06:58:16 +00:00
2024-04-22 12:49:02 +00:00
if att [ " is_bad_image " ] or image . reason in settings . ACC_EXCLUDE_RESEASONS :
if image . reason in settings . ACC_EXCLUDE_RESEASONS :
_att [ " avg_acc " ] = None
for t in _att [ " acc " ] . keys ( ) :
for k in _att [ " acc " ] [ t ] . keys ( ) :
_att [ " acc " ] [ t ] [ k ] = [ ]
else :
if request_att [ " time_cost " ] . get ( image . doc_type , None ) :
request_att [ " time_cost " ] [ image . doc_type ] . append ( image . processing_time )
else :
request_att [ " time_cost " ] [ image . doc_type ] = [ image . processing_time ]
2024-02-05 05:56:51 +00:00
new_report_file = ReportFile ( report = report ,
2024-02-16 10:38:19 +00:00
subsidiary = _sub ,
2024-02-05 05:56:51 +00:00
correspond_request_id = request . request_id ,
2024-03-20 10:03:40 +00:00
correspond_request_created_at = request . created_at ,
2024-02-05 05:56:51 +00:00
correspond_redemption_id = request . redemption_id ,
doc_type = image . doc_type ,
predict_result = image . predict_result ,
feedback_result = image . feedback_result ,
2024-04-04 06:58:16 +00:00
reviewed_result = reviewed_result ,
2024-04-22 12:49:02 +00:00
feedback_accuracy = _att [ " acc " ] [ " feedback " ] ,
reviewed_accuracy = _att [ " acc " ] [ " reviewed " ] ,
acc = _att [ " avg_acc " ] ,
2024-02-05 05:56:51 +00:00
is_bad_image = att [ " is_bad_image " ] ,
2024-03-11 09:52:18 +00:00
is_reviewed = review_status_map ( att [ " is_reviewed " ] ) ,
2024-02-05 05:56:51 +00:00
time_cost = image . processing_time ,
2024-04-04 06:58:16 +00:00
bad_image_reason = reason ,
counter_measures = counter_measure ,
2024-02-28 11:45:10 +00:00
error = " | " . join ( att [ " err " ] ) ,
review_status = att [ " is_reviewed " ] ,
2024-02-05 05:56:51 +00:00
)
report_files . append ( new_report_file )
2024-04-22 12:49:02 +00:00
if att [ " is_bad_image " ] :
request_att [ " bad_image_list " ] . append ( image . file_name )
2024-02-21 05:46:41 +00:00
_att [ " avg_acc " ] = None
2024-02-22 20:58:10 +00:00
for t in _att [ " acc " ] . keys ( ) :
for k in _att [ " acc " ] [ t ] . keys ( ) :
2024-02-21 05:46:41 +00:00
_att [ " acc " ] [ t ] [ k ] = [ ]
2024-02-05 05:56:51 +00:00
2024-02-21 05:46:41 +00:00
try :
request_att [ " acc " ] [ " feedback " ] [ " imei_number " ] + = _att [ " acc " ] [ " feedback " ] [ " imei_number " ]
request_att [ " acc " ] [ " feedback " ] [ " purchase_date " ] + = _att [ " acc " ] [ " feedback " ] [ " purchase_date " ]
request_att [ " acc " ] [ " feedback " ] [ " retailername " ] + = _att [ " acc " ] [ " feedback " ] [ " retailername " ]
request_att [ " acc " ] [ " feedback " ] [ " sold_to_party " ] + = _att [ " acc " ] [ " feedback " ] [ " sold_to_party " ]
2024-03-13 06:18:57 +00:00
request_att [ " acc " ] [ " feedback " ] [ " invoice_no " ] + = _att [ " acc " ] [ " feedback " ] [ " invoice_no " ]
2024-04-04 06:58:16 +00:00
request_att [ " acc " ] [ " reviewed " ] [ " imei_number " ] + = _att [ " acc " ] [ " reviewed " ] [ " imei_number " ] if _att [ " is_reviewed " ] == 1 else [ ]
request_att [ " acc " ] [ " reviewed " ] [ " purchase_date " ] + = _att [ " acc " ] [ " reviewed " ] [ " purchase_date " ] if _att [ " is_reviewed " ] == 1 else [ ]
request_att [ " acc " ] [ " reviewed " ] [ " retailername " ] + = _att [ " acc " ] [ " reviewed " ] [ " retailername " ] if _att [ " is_reviewed " ] == 1 else [ ]
request_att [ " acc " ] [ " reviewed " ] [ " sold_to_party " ] + = _att [ " acc " ] [ " reviewed " ] [ " sold_to_party " ] if _att [ " is_reviewed " ] == 1 else [ ]
request_att [ " acc " ] [ " reviewed " ] [ " invoice_no " ] + = _att [ " acc " ] [ " reviewed " ] [ " invoice_no " ] if _att [ " is_reviewed " ] == 1 else [ ]
request_att [ " acc " ] [ " acumulated " ] [ " imei_number " ] + = _att [ " acc " ] [ " reviewed " ] [ " imei_number " ] if _att [ " acc " ] [ " reviewed " ] [ " imei_number " ] and _att [ " is_reviewed " ] == 1 else _att [ " acc " ] [ " feedback " ] [ " imei_number " ]
request_att [ " acc " ] [ " acumulated " ] [ " purchase_date " ] + = _att [ " acc " ] [ " reviewed " ] [ " purchase_date " ] if _att [ " acc " ] [ " reviewed " ] [ " purchase_date " ] and _att [ " is_reviewed " ] == 1 else _att [ " acc " ] [ " feedback " ] [ " purchase_date " ]
request_att [ " acc " ] [ " acumulated " ] [ " retailername " ] + = _att [ " acc " ] [ " reviewed " ] [ " retailername " ] if _att [ " acc " ] [ " reviewed " ] [ " retailername " ] and _att [ " is_reviewed " ] == 1 else _att [ " acc " ] [ " feedback " ] [ " retailername " ]
request_att [ " acc " ] [ " acumulated " ] [ " sold_to_party " ] + = _att [ " acc " ] [ " reviewed " ] [ " sold_to_party " ] if _att [ " acc " ] [ " reviewed " ] [ " sold_to_party " ] and _att [ " is_reviewed " ] == 1 else _att [ " acc " ] [ " feedback " ] [ " sold_to_party " ]
request_att [ " acc " ] [ " acumulated " ] [ " invoice_no " ] + = _att [ " acc " ] [ " reviewed " ] [ " invoice_no " ] if _att [ " acc " ] [ " reviewed " ] [ " invoice_no " ] and _att [ " is_reviewed " ] == 1 else _att [ " acc " ] [ " feedback " ] [ " invoice_no " ]
2024-02-21 05:46:41 +00:00
2024-02-22 20:58:10 +00:00
if image . reason not in settings . ACC_EXCLUDE_RESEASONS :
request_att [ " bad_images " ] + = int ( _att [ " is_bad_image " ] )
request_att [ " total_images " ] + = 1
2024-02-21 05:46:41 +00:00
request_att [ " err " ] + = _att [ " err " ]
2024-02-05 05:56:51 +00:00
except Exception as e :
2024-06-26 07:58:24 +00:00
logger . error ( f " Failed to calculate request: { request . request_id } - request_file: { image . file_name } because of { e } " )
2024-02-05 05:56:51 +00:00
continue
2024-04-04 08:18:22 +00:00
2024-05-09 10:11:09 +00:00
return request_att , report_files , atts
2024-01-31 03:00:18 +00:00
2024-06-20 08:02:10 +00:00
def calculate_subcription_file ( subcription_request_file , subsidiary ) :
2024-01-31 03:00:18 +00:00
att = { " acc " : { " feedback " : { } ,
" reviewed " : { } } ,
2024-02-22 20:58:10 +00:00
" normalized_data " : { " feedback " : { } ,
" reviewed " : { } } ,
2024-01-31 03:00:18 +00:00
" err " : [ ] ,
" is_bad_image " : False ,
2024-02-28 11:45:10 +00:00
" avg_acc " : None ,
" is_reviewed " : - 1 , # -1: No need to review, 0: Not reviewed, 1: Reviewed
}
2024-01-31 03:00:18 +00:00
if not subcription_request_file . predict_result :
return 400 , att
inference_result = copy . deepcopy ( subcription_request_file . predict_result )
2024-04-04 06:58:16 +00:00
feedback_result = copy . deepcopy ( subcription_request_file . feedback_result )
reviewed_result = copy . deepcopy ( subcription_request_file . reviewed_result )
2024-01-31 03:00:18 +00:00
2024-06-20 08:02:10 +00:00
accuracy_keys_for_this_image = settings . FIELDS_BY_SUB . get ( subsidiary , settings . FIELDS_BY_SUB [ " default " ] ) . get ( subcription_request_file . doc_type , [ ] )
2024-06-17 12:05:20 +00:00
for key_name in VALID_KEYS :
att [ " acc " ] [ " feedback " ] [ key_name ] = [ ]
att [ " normalized_data " ] [ " feedback " ] [ key_name ] = [ ]
att [ " acc " ] [ " reviewed " ] [ key_name ] = [ ]
att [ " normalized_data " ] [ " reviewed " ] [ key_name ] = [ ]
for key_name in accuracy_keys_for_this_image :
2024-01-31 03:00:18 +00:00
try :
2024-04-05 11:50:41 +00:00
att [ " acc " ] [ " feedback " ] [ key_name ] , att [ " normalized_data " ] [ " feedback " ] [ key_name ] = calculate_accuracy ( key_name , inference_result , feedback_result , " feedback " , sub = subcription_request_file . request . subsidiary )
att [ " acc " ] [ " reviewed " ] [ key_name ] , att [ " normalized_data " ] [ " reviewed " ] [ key_name ] = calculate_accuracy ( key_name , inference_result , reviewed_result , " reviewed " , sub = subcription_request_file . request . subsidiary )
2024-01-31 03:00:18 +00:00
except Exception as e :
att [ " err " ] . append ( str ( e ) )
2024-02-18 14:52:23 +00:00
subcription_request_file . feedback_accuracy = att [ " acc " ] [ " feedback " ]
subcription_request_file . reviewed_accuracy = att [ " acc " ] [ " reviewed " ]
2024-04-04 06:58:16 +00:00
2024-06-17 12:05:20 +00:00
avg_reviewed = calculate_avg_accuracy ( att [ " acc " ] , " reviewed " , VALID_KEYS )
avg_feedback = calculate_avg_accuracy ( att [ " acc " ] , " feedback " , VALID_KEYS )
2024-04-22 12:49:02 +00:00
2024-01-31 03:00:18 +00:00
if avg_feedback is not None or avg_reviewed is not None :
2024-02-18 14:52:23 +00:00
avg_acc = 0
if avg_feedback is not None :
avg_acc = avg_feedback
2024-02-28 11:45:10 +00:00
if avg_feedback < settings . NEED_REVIEW :
att [ " is_reviewed " ] = 0
2024-04-04 06:58:16 +00:00
else :
att [ " is_reviewed " ] = - 1
if avg_reviewed is not None and att [ " is_reviewed " ] != - 1 :
2024-02-18 14:52:23 +00:00
avg_acc = avg_reviewed
2024-02-28 11:45:10 +00:00
att [ " is_reviewed " ] = 1
2024-02-18 14:52:23 +00:00
2024-03-11 09:52:18 +00:00
# Little trick to overcome issue caused by misleading manually review process
2024-04-04 06:58:16 +00:00
if ( subcription_request_file . reason or subcription_request_file . counter_measures ) and att [ " is_reviewed " ] != - 1 :
2024-03-11 09:52:18 +00:00
att [ " is_reviewed " ] = 1
2024-02-21 05:46:41 +00:00
att [ " avg_acc " ] = avg_acc
2024-02-28 11:45:10 +00:00
if avg_acc < settings . BAD_THRESHOLD :
2024-01-31 03:00:18 +00:00
att [ " is_bad_image " ] = True
return 200 , att
2024-02-18 14:52:23 +00:00
def mean_list ( l ) :
l = [ x for x in l if x is not None ]
if len ( l ) == 0 :
return 0
return sum ( l ) / len ( l )
2024-01-31 03:00:18 +00:00
def shadow_report ( report_id , query ) :
2024-03-13 06:18:57 +00:00
c_connector . make_a_report_2 ( ( report_id , query ) )