Spaces:

winamnd
/

ocr-llm-test

Running

App Files Files Community

winamnd commited on Feb 17

Commit

bf26c19

verified ·

1 Parent(s): 44a4a1e

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -20

app.py CHANGED Viewed

@@ -74,29 +74,27 @@ def ocr_with_tesseract(img):
     return extracted_text, confidences
 # OCR & Classification Function
-def generate_ocr(method, image):
-    if image is None:
         raise gr.Error("Please upload an image!")
     # Convert PIL Image to OpenCV format
-    img_cv = preprocess_image(image)
     # Select OCR method
     if method == "PaddleOCR":
-        extracted_text, confidences = ocr_with_paddle(img_cv)
     elif method == "EasyOCR":
-        extracted_text, confidences = ocr_with_easy(img_cv)
     elif method == "KerasOCR":
-        extracted_text, confidences = ocr_with_keras(img_cv)
     elif method == "TesseractOCR":
-        extracted_text, confidences = ocr_with_tesseract(img_cv)
     else:
         return "Invalid OCR method", "N/A"
-    # Join extracted text into a single string
-    text_output = " ".join(extracted_text).strip()
-    # If no text detected, return early
     if len(text_output) == 0:
         return "No text detected!", "Cannot classify"
@@ -108,19 +106,15 @@ def generate_ocr(method, image):
         outputs = model(**inputs)
         logits = outputs.logits  # Get raw logits
-    # Print raw logits for debugging
     print(f"Raw logits: {logits}")
-    # Compare raw logits instead of using softmax
     predicted_class = torch.argmax(logits, dim=1).item()
-    print(f"Predicted Class Index: {predicted_class}")  # Debugging output
-    # Ensure correct label mapping
-    if predicted_class == 1:
-        label = "Spam"
-    else:
-        label = "Not Spam"
     # Save results
     save_results_to_repo(text_output, label)

     return extracted_text, confidences
 # OCR & Classification Function
+def generate_ocr(method, img):
+    if img is None:
         raise gr.Error("Please upload an image!")
     # Convert PIL Image to OpenCV format
+    img = np.array(img)
     # Select OCR method
     if method == "PaddleOCR":
+        text_output = ocr_with_paddle(img)
     elif method == "EasyOCR":
+        text_output = ocr_with_easy(img)
     elif method == "KerasOCR":
+        text_output = ocr_with_keras(img)
     elif method == "TesseractOCR":
+        text_output, _ = ocr_with_tesseract(img)  # Ignore confidence values
     else:
         return "Invalid OCR method", "N/A"
+    # Clean and truncate the extracted text
+    text_output = text_output.strip()
     if len(text_output) == 0:
         return "No text detected!", "Cannot classify"
         outputs = model(**inputs)
         logits = outputs.logits  # Get raw logits
+    # Debugging: Print raw logits
     print(f"Raw logits: {logits}")
+    # Use raw logits directly instead of softmax
     predicted_class = torch.argmax(logits, dim=1).item()
+    # Map class index to labels
+    label_map = {0: "Not Spam", 1: "Spam"}
+    label = label_map.get(predicted_class, "Unknown")
     # Save results
     save_results_to_repo(text_output, label)