Merge pull request #114 from SDSRV-IDP/fix/negative_processing_time

Fix/negative processing time
This commit is contained in:
Phan Thành Trung 2024-05-03 08:54:46 +07:00 committed by GitHub Enterprise
commit bf2e5cc08a
10 changed files with 154 additions and 172 deletions

View File

@ -1,7 +1,7 @@
FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y git gcc g++ ffmpeg libsm6 libxext6 && \ apt-get install -y git gcc g++ ffmpeg libsm6 libxext6 wget && \
apt-get -y autoremove && \ apt-get -y autoremove && \
apt-get clean && \ apt-get clean && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
@ -16,10 +16,16 @@ RUN ln -s /opt/conda/lib/python3.10/site-packages/torch/lib/libcudnn.so.8 /usr/l
ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so
# RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir # RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir # RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
RUN wget https://bj.bcebos.com/fastdeploy/release/wheels/fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl \
&& pip install fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir \
&& rm fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl
RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@a59f05630a8f205756064244bf5beb8661f96180' --no-cache-dir
RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' --no-cache-dir RUN wget https://paddle-wheel.bj.bcebos.com/2.4.2/linux/linux-gpu-cuda11.6-cudnn8.4.0-mkl-gcc8.2-avx/paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl \
&& pip install paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl --no-cache-dir \
&& rm paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl
# Install SDSV packages # Install SDSV packages
COPY . /workspace/cope2n-ai-fi COPY . /workspace/cope2n-ai-fi

View File

@ -8,7 +8,7 @@ RUN groupadd --gid ${GID} ${USERNAME} \
&& apt-get install -y sudo bash gettext poppler-utils \ && apt-get install -y sudo bash gettext poppler-utils \
&& echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \ && echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \
&& chmod 0440 /etc/sudoers.d/${USERNAME} && chmod 0440 /etc/sudoers.d/${USERNAME}
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 wget -y
RUN yes | apt install postgresql gcc musl-dev RUN yes | apt install postgresql gcc musl-dev
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install uvicorn gunicorn Celery RUN pip install uvicorn gunicorn Celery
@ -17,7 +17,10 @@ RUN pip install uvicorn gunicorn Celery
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
RUN pip install -U openmim==0.3.7 --no-cache-dir RUN pip install -U openmim==0.3.7 --no-cache-dir
RUN mim install mmcv-full==1.7.2 RUN mim install mmcv-full==1.7.2
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir # RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
RUN wget https://bj.bcebos.com/fastdeploy/release/wheels/fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl \
&& pip install fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir \
&& rm fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl
# End intergration with sdskvu # End intergration with sdskvu
USER ${UID} USER ${UID}
@ -35,7 +38,11 @@ RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr &&
RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir
# For intergration with sdskvu # For intergration with sdskvu
RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir # RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir
RUN wget https://paddle-wheel.bj.bcebos.com/2.4.2/linux/linux-gpu-cuda11.6-cudnn8.4.0-mkl-gcc8.2-avx/paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl \
&& pip install paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl --no-cache-dir \
&& rm paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl
ENV TZ="Asia/Ho_Chi_Minh" ENV TZ="Asia/Ho_Chi_Minh"

View File

@ -205,7 +205,7 @@ class CtelViewSet(viewsets.ViewSet):
'type': 'boolean', 'type': 'boolean',
}, },
}, },
'required': {'imei_files'} # 'required': {'imei_files'}
} }
}, responses=None, tags=['OCR']) }, responses=None, tags=['OCR'])
@action(detail=False, url_path="images/process_sync", methods=["POST"]) @action(detail=False, url_path="images/process_sync", methods=["POST"])

View File

@ -827,14 +827,15 @@ def calculate_a_request(report, request):
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value) images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
report_files = [] report_files = []
for image in images: for image in images:
if image.processing_time < 0:
continue
status, att = calculate_subcription_file(image) status, att = calculate_subcription_file(image)
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"]) att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"]) att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
_att = copy.deepcopy(att) _att = copy.deepcopy(att)
if image.processing_time < 0:
continue
if status != 200: if status != 200:
continue continue
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]} image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}

View File

@ -20,7 +20,7 @@
}, },
{ {
"label": "invoice_no", "label": "invoice_no",
"value": "CTSM-I001676" "value": "CTSM-1001676"
} }
], ],
"doc_type": "sbt_document", "doc_type": "sbt_document",

0
tests/__init__.py Normal file
View File

View File

@ -44,86 +44,86 @@ def test_1_invoice_1_imei():
assert "358975990917032" == get_field(document, "imei_number")[0] assert "358975990917032" == get_field(document, "imei_number")[0]
def test_1_invoice_3_imei(): # def test_1_invoice_3_imei():
invoice_files = [ # invoice_files = [
"test_samples/sbt/invoice.jpg" # "test_samples/sbt/invoice.jpg"
] # ]
imei_files = [ # imei_files = [
"test_samples/sbt/imei1.jpg", # "test_samples/sbt/imei1.jpg",
"test_samples/sbt/imei2.jpg", # "test_samples/sbt/imei2.jpg",
"test_samples/sbt/imei3.jpg" # "test_samples/sbt/imei3.jpg"
] # ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files) # data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
data = data["data"]["data"] # data = data["data"]["data"]
assert data["total_pages"] == 4 # assert data["total_pages"] == 4
assert data["ocr_num_pages"] == 4 # assert data["ocr_num_pages"] == 4
assert data["status"] == 200 # assert data["status"] == 200
document = data["document"][0] # document = data["document"][0]
assert document["end_page"] == 4 # assert document["end_page"] == 4
assert document["start_page"] == 1 # assert document["start_page"] == 1
check_invoice_data(document) # check_invoice_data(document)
assert 3 == len(get_field(document, "imei_number")) # assert 3 == len(get_field(document, "imei_number"))
assert "358975990917032" == get_field(document, "imei_number")[0] # assert "358975990917032" == get_field(document, "imei_number")[0]
assert "350731691693549" == get_field(document, "imei_number")[1] # assert "350731691693549" == get_field(document, "imei_number")[1]
assert "R52W70BHDWX" == get_field(document, "imei_number")[2] # assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
def test_1_invoice_5_imei(): # def test_1_invoice_5_imei():
invoice_files = [ # invoice_files = [
"test_samples/sbt/invoice.jpg" # "test_samples/sbt/invoice.jpg"
] # ]
imei_files = [ # imei_files = [
"test_samples/sbt/imei1.jpg", # "test_samples/sbt/imei1.jpg",
"test_samples/sbt/imei2.jpg", # "test_samples/sbt/imei2.jpg",
"test_samples/sbt/imei3.jpg", # "test_samples/sbt/imei3.jpg",
"test_samples/sbt/imei4.jpeg", # "test_samples/sbt/imei4.jpeg",
"test_samples/sbt/imei5.jpg" # "test_samples/sbt/imei5.jpg"
] # ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files) # data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
data = data["data"]["data"] # data = data["data"]["data"]
assert data["total_pages"] == 6 # assert data["total_pages"] == 6
assert data["ocr_num_pages"] == 6 # assert data["ocr_num_pages"] == 6
assert data["status"] == 200 # assert data["status"] == 200
document = data["document"][0] # document = data["document"][0]
assert document["end_page"] == 6 # assert document["end_page"] == 6
assert document["start_page"] == 1 # assert document["start_page"] == 1
check_invoice_data(document) # check_invoice_data(document)
assert 5 == len(get_field(document, "imei_number")) # assert 5 == len(get_field(document, "imei_number"))
assert "358975990917032" == get_field(document, "imei_number")[0] # assert "358975990917032" == get_field(document, "imei_number")[0]
assert "350731691693549" == get_field(document, "imei_number")[1] # assert "350731691693549" == get_field(document, "imei_number")[1]
assert "R52W70BHDWX" == get_field(document, "imei_number")[2] # assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
assert "350073345090297" == get_field(document, "imei_number")[3] # assert "350073345090297" == get_field(document, "imei_number")[3]
assert "0PBL3NHW500023N" == get_field(document, "imei_number")[4] # assert "0PBL3NHW500023N" == get_field(document, "imei_number")[4]
def test_0_invoice_3_imei(): # def test_0_invoice_3_imei():
invoice_files = [] # invoice_files = []
imei_files = [ # imei_files = [
"test_samples/sbt/imei1.jpg", # "test_samples/sbt/imei1.jpg",
"test_samples/sbt/imei2.jpg", # "test_samples/sbt/imei2.jpg",
"test_samples/sbt/imei3.jpg" # "test_samples/sbt/imei3.jpg"
] # ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files) # data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
data = data["data"]["data"] # data = data["data"]["data"]
assert data["total_pages"] == 3 # assert data["total_pages"] == 3
assert data["ocr_num_pages"] == 3 # assert data["ocr_num_pages"] == 3
assert data["status"] == 200 # assert data["status"] == 200
document = data["document"][0] # document = data["document"][0]
assert document["end_page"] == 3 # assert document["end_page"] == 3
assert document["start_page"] == 1 # assert document["start_page"] == 1
assert 3 == len(get_field(document, "imei_number")) # assert 3 == len(get_field(document, "imei_number"))
assert "358975990917032" == get_field(document, "imei_number")[0] # assert "358975990917032" == get_field(document, "imei_number")[0]
assert "350731691693549" == get_field(document, "imei_number")[1] # assert "350731691693549" == get_field(document, "imei_number")[1]
assert "R52W70BHDWX" == get_field(document, "imei_number")[2] # assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
def test_1_invoice_pdf_1_imei(): def test_1_invoice_pdf_1_imei():

View File

@ -10,12 +10,26 @@ PASSWORD = os.environ.get("IDP_PASSWORD", "XXXXXXXXXXXXXXXXXXXXX")
token = login(HOST, USERNAME, PASSWORD) token = login(HOST, USERNAME, PASSWORD)
def check_invoice_data_2(document):
assert document["doc_type"] == "sbt_document"
assert get_field(document, "retailername") == "Starhub Shop"
assert get_field(document, "sold_to_party") is None
assert "2022-02-22" in get_field(document, "purchase_date")
def test_invoice_only(): def test_invoice_only():
invoice_files = [ invoice_files = [
"test_samples/sbt/20220303025923NHNE_20220222_Starhub_Order_Confirmation_by_Email.pdf" "test_samples/sbt/20220303025923NHNE_20220222_Starhub_Order_Confirmation_by_Email.pdf"
] ]
imei_files = [] imei_files = []
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False) data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
response_data = data["data"]
assert "imei_file param is required" in str(response_data) data = data["data"]["data"]
assert data["total_pages"] == 1
assert data["ocr_num_pages"] == 1
assert data["status"] == 200
document = data["document"][0]
assert document["end_page"] == 1
assert document["start_page"] == 1
check_invoice_data_2(document)

View File

@ -26,7 +26,7 @@ def test_1_invoice_6_imei():
] ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False) data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
assert "Limit reached. Number of imei_file limit at 5" in str(data["data"]) assert "Limit reached. Number of imei_file limit at 2" in str(data["data"])
def test_1_invoice_5_imei(): def test_1_invoice_5_imei():
@ -42,50 +42,4 @@ def test_1_invoice_5_imei():
] ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False) data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
data = data["data"]["data"] assert "Limit reached. Number of imei_file limit at 2" in str(data["data"])
assert data["total_pages"] == 6
assert data["ocr_num_pages"] == 6
assert data["status"] == 200
document = data["document"][0]
assert document["end_page"] == 6
assert document["start_page"] == 1
gt = {
"content": [
{
"label": "retailername",
"value": "Best Denki"
},
{
"label": "sold_to_party",
"value": None
},
{
"label": "purchase_date",
"value": [
"2022-02-18"
]
},
{
"label": "imei_number",
"value": [
"357822611219904",
"RFAW2022FED",
"5AWH14MT400396N",
"0HU33NIW200044K",
"0GJG4DBW200318X"
]
},
{
"label": "invoice_no",
"value": None
}
],
"doc_type": "sbt_document",
"end_page": 6,
"start_page": 1
}
diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"

View File

@ -43,55 +43,55 @@ def test_1_invoice_2_imei():
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}" assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
def test_1_invoice_3_imei(): # def test_1_invoice_3_imei():
invoice_files = [ # invoice_files = [
"test_samples/test_07/invoice_jpg.jpg", # "test_samples/test_07/invoice_jpg.jpg",
] # ]
imei_files = [ # imei_files = [
"test_samples/test_07/imei_valid_1.jpg", # "test_samples/test_07/imei_valid_1.jpg",
"test_samples/test_07/imei_valid_2.jpg", # "test_samples/test_07/imei_valid_2.jpg",
"test_samples/test_07/imei_valid_3.jpg", # "test_samples/test_07/imei_valid_3.jpg",
] # ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False) # data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
document = data["data"]["data"]["document"][0] # document = data["data"]["data"]["document"][0]
gt = json.load(open("test_samples/test_23.json", "r")) # gt = json.load(open("test_samples/test_23.json", "r"))
diff = deepdiff.DeepDiff(document, gt) # diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}" # assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
def test_1_invoice_4_imei(): # def test_1_invoice_4_imei():
invoice_files = [ # invoice_files = [
"test_samples/test_07/invoice_jpg.jpg", # "test_samples/test_07/invoice_jpg.jpg",
] # ]
imei_files = [ # imei_files = [
"test_samples/test_07/imei_valid_1.jpg", # "test_samples/test_07/imei_valid_1.jpg",
"test_samples/test_07/imei_valid_2.jpg", # "test_samples/test_07/imei_valid_2.jpg",
"test_samples/test_07/imei_valid_3.jpg", # "test_samples/test_07/imei_valid_3.jpg",
"test_samples/test_07/imei_valid_4.jpg", # "test_samples/test_07/imei_valid_4.jpg",
] # ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False) # data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
document = data["data"]["data"]["document"][0] # document = data["data"]["data"]["document"][0]
gt = json.load(open("test_samples/test_24.json", "r")) # gt = json.load(open("test_samples/test_24.json", "r"))
diff = deepdiff.DeepDiff(document, gt) # diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}" # assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
def test_1_invoice_5_imei(): # def test_1_invoice_5_imei():
invoice_files = [ # invoice_files = [
"test_samples/test_07/invoice_jpg.jpg", # "test_samples/test_07/invoice_jpg.jpg",
] # ]
imei_files = [ # imei_files = [
"test_samples/test_07/imei_valid_1.jpg", # "test_samples/test_07/imei_valid_1.jpg",
"test_samples/test_07/imei_valid_2.jpg", # "test_samples/test_07/imei_valid_2.jpg",
"test_samples/test_07/imei_valid_3.jpg", # "test_samples/test_07/imei_valid_3.jpg",
"test_samples/test_07/imei_valid_4.jpg", # "test_samples/test_07/imei_valid_4.jpg",
"test_samples/test_07/imei_valid_5.jpg", # "test_samples/test_07/imei_valid_5.jpg",
] # ]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False) # data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
document = data["data"]["data"]["document"][0] # document = data["data"]["data"]["document"][0]
gt = json.load(open("test_samples/test_25.json", "r")) # gt = json.load(open("test_samples/test_25.json", "r"))
diff = deepdiff.DeepDiff(document, gt) # diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}" # assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"