Business-Card-Scanner-To-Csv-gredio

Sleeping

App Files Files Community

codic commited on Mar 19

Commit

2857c36

verified ·

1 Parent(s): 1d35187

gredio

Browse files

Files changed (1) hide show

app.py +101 -177

app.py CHANGED Viewed

@@ -1,68 +1,35 @@
-import streamlit as st
 import easyocr
 import pandas as pd
-from io import BytesIO
-from PIL import Image
 import numpy as np
 import os
 from pathlib import Path
 from gliner import GLiNER
 import cv2
 import re
 # Set environment variables for model storage
-os.environ['GLINER_HOME'] = str(Path.home() / '.gliner_models')
-os.environ['TRANSFORMERS_CACHE'] = str(Path.home() / '.gliner_models' / 'cache')
 # Initialize EasyOCR reader with English and Arabic support
 reader = easyocr.Reader(['en', 'ar'])
-def get_model_path():
-    """Get the path to the local model directory."""
-    base_dir = Path.home() / '.gliner_models'
-    model_dir = base_dir / 'gliner_large-v2.1'
-    return model_dir
-def download_model():
-    """Download the model if it doesn't exist locally."""
-    model_dir = get_model_path()
-    if not model_dir.exists():
-        st.info("Downloading GLiNER model for the first time... This may take a few minutes.")
-        try:
-            model_dir.parent.mkdir(parents=True, exist_ok=True)
-            temp_model = GLiNER.from_pretrained("urchade/gliner_large-v2.1")
-            temp_model.save_pretrained(str(model_dir))
-            st.success("Model downloaded successfully!")
-            return temp_model
-        except Exception as e:
-            st.error(f"Error downloading model: {str(e)}")
-            raise e
-    return None
-@st.cache_resource
 def load_gliner_model():
-    """Load the GLiNER model, downloading it if necessary."""
-    model_dir = get_model_path()
-    if model_dir.exists():
-        try:
-            return GLiNER.from_pretrained(str(model_dir))
-        except Exception as e:
-            st.warning("Error loading existing model. Attempting to redownload...")
-            import shutil
-            shutil.rmtree(model_dir, ignore_errors=True)
-    model = download_model()
-    if model:
         return model
-    return GLiNER.from_pretrained(str(model_dir))
 def preprocess_image(image):
-    """
-    Preprocess the image using OpenCV:
-    - Convert to grayscale
-    - Apply median blur for denoising
-    - Apply thresholding (Otsu) for binarization
-    """
     img_array = np.array(image)
     gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
     denoised = cv2.medianBlur(gray, 3)
@@ -70,145 +37,102 @@ def preprocess_image(image):
     return thresh
 def clean_extracted_text(text):
-    """
-    Clean the extracted text:
-    - Remove unwanted characters while preserving Arabic Unicode blocks,
-      English letters, digits, spaces, and common punctuation.
-    - Normalize extra spaces.
-    """
     cleaned = re.sub(r'[^\u0600-\u06FF\u0750-\u077F\u08A0-\u08FFA-Za-z0-9\s@.,-]', '', text)
-    cleaned = re.sub(r'\s+', ' ', cleaned).strip()
-    return cleaned
-def extract_text_from_image(image):
-    """
-    Preprocess the image and extract text using EasyOCR.
-    """
-    preprocessed_image = preprocess_image(image)
-    return reader.readtext(preprocessed_image, detail=0, paragraph=True)
-def process_entities(text: str, model, threshold: float, nested_ner: bool) -> dict:
-    """
-    Process text with GLiNER model to extract business card entities.
-    """
-    # Define business card labels
-    labels = "person name, company name, job title, phone, email, address"
-    labels = [label.strip() for label in labels.split(",")]
-    # Get predictions
-    entities = model.predict_entities(
-        text,
-        labels,
-        flat_ner=not nested_ner,
-        threshold=threshold
-    )
-    # Format results
-    formatted_entities = []
-    for entity in entities:
-        formatted_entities.append({
-            "entity": entity["label"],
-            "word": entity["text"],
-            "start": entity["start"],
-            "end": entity["end"]
-        })
-    # Organize results by category
-    results = {
-        "Person Name": [],
-        "Company Name": [],
-        "Job Title": [],
-        "Phone": [],
-        "Email": [],
-        "Address": []
     }
-    for entity in formatted_entities:
-        category = entity["entity"].title()
-        if category in results:
-            results[category].append(entity["word"])
-    # Join multiple entries with semicolons
-    return {k: "; ".join(set(v)) if v else "" for k, v in results.items()}
-def main():
-    st.title("Business Card Information Extractor")
-    # Model settings in sidebar
-    st.sidebar.title("Settings")
-    threshold = st.sidebar.slider(
-        "Detection Threshold",
-        min_value=0.0,
-        max_value=1.0,
-        value=0.3,
-        step=0.05,
-        help="Lower values will detect more entities"
-    )
-    nested_ner = st.sidebar.checkbox(
-        "Enable Nested NER",
-        value=True,
-        help="Allow detection of nested entities"
-    )
-    # Upload options
-    upload_type = st.sidebar.radio("Upload Type", ("Single", "Batch"))
-    # File uploader for business card images
-    uploaded_files = st.file_uploader(
-        "Upload Business Card Image(s)",
-        type=["png", "jpg", "jpeg"],
-        accept_multiple_files=(upload_type == "Batch")
     )
-    if uploaded_files:
-        # Load GLiNER model
-        model = load_gliner_model()
-        results = []
-        files_to_process = uploaded_files if isinstance(uploaded_files, list) else [uploaded_files]
-        progress_bar = st.progress(0)
-        for idx, file in enumerate(files_to_process):
-            with st.expander(f"Processing {file.name}"):
-                image = Image.open(file)
-                # Extract text using OCR after preprocessing
-                extracted_text_list = extract_text_from_image(image)
-                raw_text = " ".join(extracted_text_list)
-                # Clean the extracted text
-                clean_text = clean_extracted_text(raw_text)
-                st.text("Extracted Text:")
-                st.text(clean_text)
-                # Process extracted text with GLiNER for entity recognition
-                result = process_entities(clean_text, model, threshold, nested_ner)
-                result["File Name"] = file.name
-                results.append(result)
-                st.json(result)
-            progress_bar.progress((idx + 1) / len(files_to_process))
-        if results:
-            st.success("Processing Complete!")
-            # Convert results to a DataFrame
-            df = pd.DataFrame(results)
-            cols = ["File Name"] + [col for col in df.columns if col != "File Name"]
-            df = df[cols]
-            st.dataframe(df, use_container_width=True)
-            csv = df.to_csv(index=False)
-            st.download_button(
-                "Download Results CSV",
-                csv,
-                "business_card_results.csv",
-                "text/csv",
-                key='download-csv'
-            )
-if __name__ == "__main__":
-    main()

+import gradio as gr
 import easyocr
 import pandas as pd
 import numpy as np
 import os
 from pathlib import Path
 from gliner import GLiNER
 import cv2
 import re
+from PIL import Image
+import time
 # Set environment variables for model storage
+os.environ['GLINER_HOME'] = '/tmp/.gliner_models'
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/.gliner_models/cache'
 # Initialize EasyOCR reader with English and Arabic support
 reader = easyocr.Reader(['en', 'ar'])
+# Initialize GLiNER model
 def load_gliner_model():
+    model_path = Path(os.environ['GLINER_HOME']) / 'gliner_large-v2.1'
+    if not model_path.exists():
+        model_path.parent.mkdir(parents=True, exist_ok=True)
+        model = GLiNER.from_pretrained("urchade/gliner_large-v2.1")
+        model.save_pretrained(str(model_path))
         return model
+    return GLiNER.from_pretrained(str(model_path))
+model = load_gliner_model()
 def preprocess_image(image):
     img_array = np.array(image)
     gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
     denoised = cv2.medianBlur(gray, 3)
     return thresh
 def clean_extracted_text(text):
     cleaned = re.sub(r'[^\u0600-\u06FF\u0750-\u077F\u08A0-\u08FFA-Za-z0-9\s@.,-]', '', text)
+    return re.sub(r'\s+', ' ', cleaned).strip()
+def process_single_image(image, threshold=0.3, nested_ner=True, progress=gr.Progress()):
+    try:
+        # Preprocess and extract text
+        progress(0.2, "Processing image...")
+        preprocessed = preprocess_image(image)
+        ocr_results = reader.readtext(preprocessed, detail=0, paragraph=True)
+        clean_text = clean_extracted_text(" ".join(ocr_results))
+        # Entity extraction
+        progress(0.6, "Extracting entities...")
+        labels = ["person name", "company name", "job title", "phone", "email", "address"]
+        entities = model.predict_entities(clean_text, labels, threshold=threshold, flat_ner=not nested_ner)
+        # Format results
+        results = {label.title(): [] for label in labels}
+        for entity in entities:
+            label = entity["label"].title()
+            if label in results:
+                results[label].append(entity["text"])
+        return {
+            "text": clean_text,
+            "entities": {k: "; ".join(set(v)) for k, v in results.items()},
+            "csv": pd.DataFrame([results])
+        }
+    except Exception as e:
+        return {"error": str(e)}
+def process_batch(files, threshold, nested_ner, progress=gr.Progress()):
+    results = []
+    for i, file in enumerate(files):
+        progress(i/len(files), f"Processing {file.name}...")
+        try:
+            image = Image.open(file)
+            result = process_single_image(image, threshold, nested_ner)
+            if "error" not in result:
+                result["filename"] = file.name
+                results.append(result)
+        except Exception as e:
+            results.append({"filename": file.name, "error": str(e)})
+    # Create CSV
+    df = pd.DataFrame([{
+        "Filename": r["filename"],
+        **r.get("entities", {}),
+        "Raw Text": r.get("text", ""),
+        "Error": r.get("error", "")
+    } for r in results])
+    return {
+        "batch_results": results,
+        "csv": df
     }
+with gr.Blocks() as app:
+    gr.Markdown("# Business Card Information Extractor")
+    with gr.Tab("Single File"):
+        with gr.Row():
+            with gr.Column():
+                single_image = gr.Image(label="Upload Business Card", type="pil")
+                threshold_single = gr.Slider(0.0, 1.0, value=0.3, label="Detection Threshold")
+                nested_ner_single = gr.Checkbox(True, label="Enable Nested NER")
+                submit_single = gr.Button("Process")
+            with gr.Column():
+                text_output = gr.Textbox(label="Extracted Text")
+                json_output = gr.JSON(label="Entities")
+                csv_download_single = gr.File(label="Download Results")
+    with gr.Tab("Batch Processing"):
+        with gr.Row():
+            with gr.Column():
+                batch_files = gr.Files(label="Upload Business Cards", file_types=["image"])
+                threshold_batch = gr.Slider(0.0, 1.0, value=0.3, label="Detection Threshold")
+                nested_ner_batch = gr.Checkbox(True, label="Enable Nested NER")
+                submit_batch = gr.Button("Process Batch")
+            with gr.Column():
+                batch_results = gr.JSON(label="Processing Results")
+                csv_download_batch = gr.File(label="Download CSV")
+    # Single file processing
+    submit_single.click(
+        fn=process_single_image,
+        inputs=[single_image, threshold_single, nested_ner_single],
+        outputs=[text_output, json_output, csv_download_single]
     )
+    # Batch processing
+    submit_batch.click(
+        fn=process_batch,
+        inputs=[batch_files, threshold_batch, nested_ner_batch],
+        outputs=[batch_results, csv_download_batch]
+    )
+# For API access
+app.launch(enable_queue=True, share=True)