Merge pull request #114 from SDSRV-IDP/fix/negative_processing_time

Fix/negative processing time
This commit is contained in:
Phan Thành Trung 2024-05-03 08:54:46 +07:00 committed by GitHub Enterprise
commit bf2e5cc08a
10 changed files with 154 additions and 172 deletions

View File

@ -1,7 +1,7 @@
FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-runtime
RUN apt-get update && \
apt-get install -y git gcc g++ ffmpeg libsm6 libxext6 && \
apt-get install -y git gcc g++ ffmpeg libsm6 libxext6 wget && \
apt-get -y autoremove && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
@ -16,10 +16,16 @@ RUN ln -s /opt/conda/lib/python3.10/site-packages/torch/lib/libcudnn.so.8 /usr/l
ln -s /opt/conda/lib/libcublas.so /usr/lib/libcublas.so
# RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
# RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
RUN wget https://bj.bcebos.com/fastdeploy/release/wheels/fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl \
&& pip install fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir \
&& rm fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl
RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git@a59f05630a8f205756064244bf5beb8661f96180' --no-cache-dir
RUN python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' --no-cache-dir
RUN wget https://paddle-wheel.bj.bcebos.com/2.4.2/linux/linux-gpu-cuda11.6-cudnn8.4.0-mkl-gcc8.2-avx/paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl \
&& pip install paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl --no-cache-dir \
&& rm paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl
# Install SDSV packages
COPY . /workspace/cope2n-ai-fi

View File

@ -8,7 +8,7 @@ RUN groupadd --gid ${GID} ${USERNAME} \
&& apt-get install -y sudo bash gettext poppler-utils \
&& echo ${USERNAME} ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/${USERNAME} \
&& chmod 0440 /etc/sudoers.d/${USERNAME}
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 wget -y
RUN yes | apt install postgresql gcc musl-dev
RUN pip install --upgrade pip
RUN pip install uvicorn gunicorn Celery
@ -17,7 +17,10 @@ RUN pip install uvicorn gunicorn Celery
RUN pip install pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116
RUN pip install -U openmim==0.3.7 --no-cache-dir
RUN mim install mmcv-full==1.7.2
RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
# RUN pip install fastdeploy-gpu-python==1.0.7 -f https://www.paddlepaddle.org.cn/whl/fastdeploy.html --no-cache-dir
RUN wget https://bj.bcebos.com/fastdeploy/release/wheels/fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl \
&& pip install fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl --no-cache-dir \
&& rm fastdeploy_python-1.0.7-cp310-cp310-manylinux1_x86_64.whl
# End intergration with sdskvu
USER ${UID}
@ -35,7 +38,11 @@ RUN cd /app/fwd_api/utils/sdsvkvu/sdsvkvu/externals/sdsvocr/externals/sdsvtr &&
RUN cd /app/fwd_api/utils/sdsvkvu && pip3 install -v -e . --no-cache-dir
# For intergration with sdskvu
RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir
# RUN python -m pip install paddlepaddle-gpu==2.4.2.post116 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html --no-cache-dir
RUN wget https://paddle-wheel.bj.bcebos.com/2.4.2/linux/linux-gpu-cuda11.6-cudnn8.4.0-mkl-gcc8.2-avx/paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl \
&& pip install paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl --no-cache-dir \
&& rm paddlepaddle_gpu-2.4.2.post116-cp310-cp310-linux_x86_64.whl
ENV TZ="Asia/Ho_Chi_Minh"

View File

@ -205,7 +205,7 @@ class CtelViewSet(viewsets.ViewSet):
'type': 'boolean',
},
},
'required': {'imei_files'}
# 'required': {'imei_files'}
}
}, responses=None, tags=['OCR'])
@action(detail=False, url_path="images/process_sync", methods=["POST"])

View File

@ -827,14 +827,15 @@ def calculate_a_request(report, request):
images = SubscriptionRequestFile.objects.filter(request=request, file_category=FileCategory.Origin.value)
report_files = []
for image in images:
if image.processing_time < 0:
continue
status, att = calculate_subcription_file(image)
att["acc"]["feedback"], fb_max_indexes = acc_maximize_list_values(att["acc"]["feedback"])
att["acc"]["reviewed"], rv_max_indexes = acc_maximize_list_values(att["acc"]["reviewed"])
_att = copy.deepcopy(att)
if image.processing_time < 0:
continue
if status != 200:
continue
image.feedback_accuracy = att["acc"]["feedback"] # dict {key: [values]}

View File

@ -20,7 +20,7 @@
},
{
"label": "invoice_no",
"value": "CTSM-I001676"
"value": "CTSM-1001676"
}
],
"doc_type": "sbt_document",

0
tests/__init__.py Normal file
View File

View File

@ -44,86 +44,86 @@ def test_1_invoice_1_imei():
assert "358975990917032" == get_field(document, "imei_number")[0]
def test_1_invoice_3_imei():
invoice_files = [
"test_samples/sbt/invoice.jpg"
]
imei_files = [
"test_samples/sbt/imei1.jpg",
"test_samples/sbt/imei2.jpg",
"test_samples/sbt/imei3.jpg"
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
# def test_1_invoice_3_imei():
# invoice_files = [
# "test_samples/sbt/invoice.jpg"
# ]
# imei_files = [
# "test_samples/sbt/imei1.jpg",
# "test_samples/sbt/imei2.jpg",
# "test_samples/sbt/imei3.jpg"
# ]
# data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
data = data["data"]["data"]
assert data["total_pages"] == 4
assert data["ocr_num_pages"] == 4
assert data["status"] == 200
# data = data["data"]["data"]
# assert data["total_pages"] == 4
# assert data["ocr_num_pages"] == 4
# assert data["status"] == 200
document = data["document"][0]
assert document["end_page"] == 4
assert document["start_page"] == 1
check_invoice_data(document)
# document = data["document"][0]
# assert document["end_page"] == 4
# assert document["start_page"] == 1
# check_invoice_data(document)
assert 3 == len(get_field(document, "imei_number"))
assert "358975990917032" == get_field(document, "imei_number")[0]
assert "350731691693549" == get_field(document, "imei_number")[1]
assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
# assert 3 == len(get_field(document, "imei_number"))
# assert "358975990917032" == get_field(document, "imei_number")[0]
# assert "350731691693549" == get_field(document, "imei_number")[1]
# assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
def test_1_invoice_5_imei():
invoice_files = [
"test_samples/sbt/invoice.jpg"
]
imei_files = [
"test_samples/sbt/imei1.jpg",
"test_samples/sbt/imei2.jpg",
"test_samples/sbt/imei3.jpg",
"test_samples/sbt/imei4.jpeg",
"test_samples/sbt/imei5.jpg"
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
# def test_1_invoice_5_imei():
# invoice_files = [
# "test_samples/sbt/invoice.jpg"
# ]
# imei_files = [
# "test_samples/sbt/imei1.jpg",
# "test_samples/sbt/imei2.jpg",
# "test_samples/sbt/imei3.jpg",
# "test_samples/sbt/imei4.jpeg",
# "test_samples/sbt/imei5.jpg"
# ]
# data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
data = data["data"]["data"]
assert data["total_pages"] == 6
assert data["ocr_num_pages"] == 6
assert data["status"] == 200
# data = data["data"]["data"]
# assert data["total_pages"] == 6
# assert data["ocr_num_pages"] == 6
# assert data["status"] == 200
document = data["document"][0]
assert document["end_page"] == 6
assert document["start_page"] == 1
check_invoice_data(document)
# document = data["document"][0]
# assert document["end_page"] == 6
# assert document["start_page"] == 1
# check_invoice_data(document)
assert 5 == len(get_field(document, "imei_number"))
assert "358975990917032" == get_field(document, "imei_number")[0]
assert "350731691693549" == get_field(document, "imei_number")[1]
assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
assert "350073345090297" == get_field(document, "imei_number")[3]
assert "0PBL3NHW500023N" == get_field(document, "imei_number")[4]
# assert 5 == len(get_field(document, "imei_number"))
# assert "358975990917032" == get_field(document, "imei_number")[0]
# assert "350731691693549" == get_field(document, "imei_number")[1]
# assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
# assert "350073345090297" == get_field(document, "imei_number")[3]
# assert "0PBL3NHW500023N" == get_field(document, "imei_number")[4]
def test_0_invoice_3_imei():
invoice_files = []
imei_files = [
"test_samples/sbt/imei1.jpg",
"test_samples/sbt/imei2.jpg",
"test_samples/sbt/imei3.jpg"
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
# def test_0_invoice_3_imei():
# invoice_files = []
# imei_files = [
# "test_samples/sbt/imei1.jpg",
# "test_samples/sbt/imei2.jpg",
# "test_samples/sbt/imei3.jpg"
# ]
# data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files)
data = data["data"]["data"]
assert data["total_pages"] == 3
assert data["ocr_num_pages"] == 3
assert data["status"] == 200
# data = data["data"]["data"]
# assert data["total_pages"] == 3
# assert data["ocr_num_pages"] == 3
# assert data["status"] == 200
document = data["document"][0]
assert document["end_page"] == 3
assert document["start_page"] == 1
# document = data["document"][0]
# assert document["end_page"] == 3
# assert document["start_page"] == 1
assert 3 == len(get_field(document, "imei_number"))
assert "358975990917032" == get_field(document, "imei_number")[0]
assert "350731691693549" == get_field(document, "imei_number")[1]
assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
# assert 3 == len(get_field(document, "imei_number"))
# assert "358975990917032" == get_field(document, "imei_number")[0]
# assert "350731691693549" == get_field(document, "imei_number")[1]
# assert "R52W70BHDWX" == get_field(document, "imei_number")[2]
def test_1_invoice_pdf_1_imei():

View File

@ -10,12 +10,26 @@ PASSWORD = os.environ.get("IDP_PASSWORD", "XXXXXXXXXXXXXXXXXXXXX")
token = login(HOST, USERNAME, PASSWORD)
def check_invoice_data_2(document):
assert document["doc_type"] == "sbt_document"
assert get_field(document, "retailername") == "Starhub Shop"
assert get_field(document, "sold_to_party") is None
assert "2022-02-22" in get_field(document, "purchase_date")
def test_invoice_only():
invoice_files = [
"test_samples/sbt/20220303025923NHNE_20220222_Starhub_Order_Confirmation_by_Email.pdf"
]
imei_files = []
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
response_data = data["data"]
assert "imei_file param is required" in str(response_data)
data = data["data"]["data"]
assert data["total_pages"] == 1
assert data["ocr_num_pages"] == 1
assert data["status"] == 200
document = data["document"][0]
assert document["end_page"] == 1
assert document["start_page"] == 1
check_invoice_data_2(document)

View File

@ -26,7 +26,7 @@ def test_1_invoice_6_imei():
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
assert "Limit reached. Number of imei_file limit at 5" in str(data["data"])
assert "Limit reached. Number of imei_file limit at 2" in str(data["data"])
def test_1_invoice_5_imei():
@ -42,50 +42,4 @@ def test_1_invoice_5_imei():
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
data = data["data"]["data"]
assert data["total_pages"] == 6
assert data["ocr_num_pages"] == 6
assert data["status"] == 200
document = data["document"][0]
assert document["end_page"] == 6
assert document["start_page"] == 1
gt = {
"content": [
{
"label": "retailername",
"value": "Best Denki"
},
{
"label": "sold_to_party",
"value": None
},
{
"label": "purchase_date",
"value": [
"2022-02-18"
]
},
{
"label": "imei_number",
"value": [
"357822611219904",
"RFAW2022FED",
"5AWH14MT400396N",
"0HU33NIW200044K",
"0GJG4DBW200318X"
]
},
{
"label": "invoice_no",
"value": None
}
],
"doc_type": "sbt_document",
"end_page": 6,
"start_page": 1
}
diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
assert "Limit reached. Number of imei_file limit at 2" in str(data["data"])

View File

@ -43,55 +43,55 @@ def test_1_invoice_2_imei():
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
def test_1_invoice_3_imei():
invoice_files = [
"test_samples/test_07/invoice_jpg.jpg",
]
imei_files = [
"test_samples/test_07/imei_valid_1.jpg",
"test_samples/test_07/imei_valid_2.jpg",
"test_samples/test_07/imei_valid_3.jpg",
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
document = data["data"]["data"]["document"][0]
gt = json.load(open("test_samples/test_23.json", "r"))
diff = deepdiff.DeepDiff(document, gt)
# def test_1_invoice_3_imei():
# invoice_files = [
# "test_samples/test_07/invoice_jpg.jpg",
# ]
# imei_files = [
# "test_samples/test_07/imei_valid_1.jpg",
# "test_samples/test_07/imei_valid_2.jpg",
# "test_samples/test_07/imei_valid_3.jpg",
# ]
# data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
# document = data["data"]["data"]["document"][0]
# gt = json.load(open("test_samples/test_23.json", "r"))
# diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
# assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
def test_1_invoice_4_imei():
invoice_files = [
"test_samples/test_07/invoice_jpg.jpg",
]
imei_files = [
"test_samples/test_07/imei_valid_1.jpg",
"test_samples/test_07/imei_valid_2.jpg",
"test_samples/test_07/imei_valid_3.jpg",
"test_samples/test_07/imei_valid_4.jpg",
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
document = data["data"]["data"]["document"][0]
gt = json.load(open("test_samples/test_24.json", "r"))
diff = deepdiff.DeepDiff(document, gt)
# def test_1_invoice_4_imei():
# invoice_files = [
# "test_samples/test_07/invoice_jpg.jpg",
# ]
# imei_files = [
# "test_samples/test_07/imei_valid_1.jpg",
# "test_samples/test_07/imei_valid_2.jpg",
# "test_samples/test_07/imei_valid_3.jpg",
# "test_samples/test_07/imei_valid_4.jpg",
# ]
# data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
# document = data["data"]["data"]["document"][0]
# gt = json.load(open("test_samples/test_24.json", "r"))
# diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
# assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
def test_1_invoice_5_imei():
invoice_files = [
"test_samples/test_07/invoice_jpg.jpg",
]
imei_files = [
"test_samples/test_07/imei_valid_1.jpg",
"test_samples/test_07/imei_valid_2.jpg",
"test_samples/test_07/imei_valid_3.jpg",
"test_samples/test_07/imei_valid_4.jpg",
"test_samples/test_07/imei_valid_5.jpg",
]
data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
document = data["data"]["data"]["document"][0]
gt = json.load(open("test_samples/test_25.json", "r"))
diff = deepdiff.DeepDiff(document, gt)
# def test_1_invoice_5_imei():
# invoice_files = [
# "test_samples/test_07/invoice_jpg.jpg",
# ]
# imei_files = [
# "test_samples/test_07/imei_valid_1.jpg",
# "test_samples/test_07/imei_valid_2.jpg",
# "test_samples/test_07/imei_valid_3.jpg",
# "test_samples/test_07/imei_valid_4.jpg",
# "test_samples/test_07/imei_valid_5.jpg",
# ]
# data = make_sbt_request(host=HOST, token=token, invoice_files=invoice_files, imei_files=imei_files, ensure_success=False)
# document = data["data"]["data"]["document"][0]
# gt = json.load(open("test_samples/test_25.json", "r"))
# diff = deepdiff.DeepDiff(document, gt)
assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"
# assert diff == {}, f"Different result! GT: {gt}; RESULT: {document}"