Spaces:

Noureddinesa
/

Layoutlmv3_v2_space

Running

App Files Files Community

Noureddinesa commited on Apr 8, 2024

Commit

901dbf6

verified ·

1 Parent(s): c47a001

Upload 5 files

Browse files

Files changed (5) hide show

App.py +19 -0
README.md +12 -0
arial.ttf +0 -0
requirements.txt +9 -0
utilitis.py +161 -0

App.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import streamlit as st
+from utilitis import Draw,Add_Results
+from PIL import Image
+st.title("Welcome to Textra WebApp")
+st.markdown("### Drag and Drop Images Here:")
+st.write("(PNG, JPG, JPEG)")
+uploaded_file = st.file_uploader("Or select a file:", type=["png", "jpg", "jpeg"], accept_multiple_files=False)
+if uploaded_file is not None:
+    image = Image.open(uploaded_file)
+    image = image.convert("RGB")
+    image,Results = Draw(image)
+    #st.write("Predicted Text:",type(image))
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    st.sidebar.title('Results')
+    Add_Results(Results)

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: LayoutLmv3 Space
+emoji: 🏢
+colorFrom: green
+colorTo: red
+sdk: streamlit
+sdk_version: 1.33.0
+app_file: App.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

arial.ttf ADDED Viewed

Binary file (915 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+numpy==1.26.4
+paddleocr==2.7.0.3
+paddlepaddle==2.6.0
+pillow==10.2.0
+streamlit==1.33.0
+torch==2.2.2
+torchaudio==2.2.2
+torchvision==0.17.2
+transformers==4.39.2

utilitis.py ADDED Viewed

	@@ -0,0 +1,161 @@

+import streamlit as st
+from paddleocr import PaddleOCR
+from PIL import ImageDraw, ImageFont
+import torch
+from transformers import AutoProcessor,LayoutLMv3ForTokenClassification
+import numpy as np
+model_Hugging_path = "Noureddinesa/Output_LayoutLMv3_v2"
+#############################################################################
+#############################################################################
+def Labels():
+    labels = ['InvNum', 'InvDate', 'Fourni', 'TTC', 'TVA', 'TT', 'Autre']
+    id2label = {v: k for v, k in enumerate(labels)}
+    label2id = {k: v for v, k in enumerate(labels)}
+    return id2label, label2id
+#############################################################################
+#############################################################################
+def Paddle():
+    ocr = PaddleOCR(use_angle_cls=False,lang='fr',rec=False)
+    return ocr
+def processbbox(BBOX, width, height):
+    bbox = []
+    bbox.append(BBOX[0][0])
+    bbox.append(BBOX[0][1])
+    bbox.append(BBOX[2][0])
+    bbox.append(BBOX[2][1])
+    #Scaling
+    bbox[0]= 1000*bbox[0]/width # X1
+    bbox[1]= 1000*bbox[1]/height # Y1
+    bbox[2]= 1000*bbox[2]/width # X2
+    bbox[3]= 1000*bbox[3]/height # Y2
+    for i in range(4):
+        bbox[i] = int(bbox[i])
+    return bbox
+def Preprocess(image):
+    image_array = np.array(image)
+    ocr = Paddle()
+    width, height = image.size
+    results = ocr.ocr(image_array, cls=True)
+    results = results[0]
+    test_dict = {'image': image ,'tokens':[], "bboxes":[]}
+    for item in results :
+       bbox = processbbox(item[0], width, height)
+       test_dict['tokens'].append(item[1][0])
+       test_dict['bboxes'].append(bbox)
+    print(test_dict['bboxes'])
+    print(test_dict['tokens'])
+    return test_dict
+#############################################################################
+#############################################################################
+def Encode(image):
+    example = Preprocess(image)
+    image = example["image"]
+    words = example["tokens"]
+    boxes = example["bboxes"]
+    processor = AutoProcessor.from_pretrained(model_Hugging_path, apply_ocr=False)
+    encoding = processor(image, words, boxes=boxes,return_offsets_mapping=True,truncation=True, max_length=512, padding="max_length", return_tensors="pt")
+    offset_mapping = encoding.pop('offset_mapping')
+    return encoding, offset_mapping,words
+def unnormalize_box(bbox, width, height):
+     return [
+         width * (bbox[0] / 1000),
+         height * (bbox[1] / 1000),
+         width * (bbox[2] / 1000),
+         height * (bbox[3] / 1000),
+     ]
+def Run_model(image):
+    encoding,offset_mapping,words = Encode(image)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    # load the fine-tuned model from the hub
+    model = LayoutLMv3ForTokenClassification.from_pretrained(model_Hugging_path)
+    model.to(device)
+    # forward pass
+    outputs = model(**encoding)
+    predictions = outputs.logits.argmax(-1).squeeze().tolist()
+    token_boxes = encoding.bbox.squeeze().tolist()
+    width, height = image.size
+    id2label, _  = Labels()
+    is_subword = np.array(offset_mapping.squeeze().tolist())[:,0] != 0
+    true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
+    true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
+    return true_predictions,true_boxes,words
+def Get_Json(true_predictions,words):
+    Results = {}
+    i = 0
+    for prd in true_predictions:
+        if prd in ['InvNum','Fourni', 'InvDate','TT','TTC','TVA']:
+                #print(i,prd,words[i-1])
+                Results[prd] = words[i-1]
+        i+=1
+    return Results
+def Draw(image):
+    true_predictions, true_boxes,words = Run_model(image)
+    draw = ImageDraw.Draw(image)
+    label2color = {
+        'InvNum': 'blue',
+        'InvDate': 'green',
+        'Fourni': 'orange',
+        'TTC':'purple',
+        'TVA': 'magenta',
+        'TT': 'red',
+        'Autre': 'black'
+    }
+    # Adjust the thickness of the rectangle outline and label text position
+    rectangle_thickness = 4
+    label_x_offset = 20
+    label_y_offset = -40
+    # Custom font size
+    custom_font_size = 25
+    # Load a font with the custom size
+    font_path = "arial.ttf"  # Specify the path to your font file
+    custom_font = ImageFont.truetype(font_path, custom_font_size)
+    for prediction, box in zip(true_predictions, true_boxes):
+        predicted_label = prediction
+        # Check if the predicted label exists in the label2color dictionary
+        if predicted_label in label2color:
+            color = label2color[predicted_label]
+        else:
+            color = 'black'  # Default color if label is not found
+        if predicted_label != "Autre":
+            draw.rectangle(box, outline=color, width=rectangle_thickness)
+            # Draw text using the custom font and size
+            draw.rectangle((box[0], box[1]+ label_y_offset,box[2],box[3]+ label_y_offset), fill=color)
+            draw.text((box[0] + label_x_offset, box[1] + label_y_offset), text=predicted_label, fill='white', font=custom_font)
+    # Get the Results Json File
+    Results = Get_Json(true_predictions,words)
+    return image,Results
+def Add_Results(data):
+    # Render the table
+    for key, value in data.items():
+        data[key] = st.sidebar.text_input(key, value)
+#############################################################################
+#############################################################################