Spaces:

Hirai-Labs
/

SmolVLM-ALPR

Runtime error

App Files Files Community

danielsyahputra commited on Feb 21

Commit

add05ef

verified ·

1 Parent(s): 3259162

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +4 -0
app.py +55 -0
images/image1.jpg +3 -0
images/image2.jpg +3 -0
images/image3.jpg +3 -0
images/image4.jpg +3 -0
requirements.txt +62 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+images/image1.jpg filter=lfs diff=lfs merge=lfs -text
+images/image2.jpg filter=lfs diff=lfs merge=lfs -text
+images/image3.jpg filter=lfs diff=lfs merge=lfs -text
+images/image4.jpg filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import ast
+import torch
+import gradio as gr
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForVision2Seq
+from transformers.image_utils import load_image
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
+model = AutoModelForVision2Seq.from_pretrained(
+    "Hirai-Labs/FT-SmolVLM-500M-Instruct-ALPR",
+    torch_dtype=torch.bfloat16,
+    _attn_implementation="eager" if DEVICE == "cuda" else "eager",
+).to(DEVICE)
+# Create input messages
+messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "You are an AI assistant whose job is to inspect an image and provide the desired information from the image. If the desired field is not clear or not well detected, return None for this field. Do not try to guess."},
+                {"type": "image"},
+                {"type": "text", "text": 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"type": {"title": "Type", "description": "Return the type of the vehicle", "examples": ["Car", "Truck", "Motorcycle", "Bus"], "type": "string"}, "license_plate": {"title": "License Plate", "description": "Return the license plate number of the vehicle", "type": "string"}, "make": {"title": "Make", "description": "Return the Make of the vehicle", "examples": ["Toyota", "Honda", "Ford", "Suzuki"], "type": "string"}, "model": {"title": "Model", "description": "Return the model of the vehicle", "examples": ["Corolla", "Civic", "F-150"], "type": "string"}, "color": {"title": "Color", "description": "Return the color of the vehicle", "examples": ["Red", "Blue", "Black", "White"], "type": "string"}}, "required": ["type", "license_plate", "make", "model", "color"]}\n```'}
+            ]
+        }
+    ]
+def predictor(image):
+    image = load_image(image=image)
+    prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
+    inputs = processor(text=prompt, images=[image], return_tensors="pt")
+    inputs = inputs.to(DEVICE)
+    generated_ids = model.generate(**inputs, max_new_tokens=500)
+    generated_texts = processor.batch_decode(
+        generated_ids,
+        skip_special_tokens=True,
+    )
+    output = generated_texts[0]
+    assistant_part = output.split("Assistant: ")[1]
+    dict_data = ast.literal_eval(assistant_part)
+    return dict_data
+iface = gr.Interface(
+    fn=predictor,
+    inputs=gr.Image(type="pil"),
+    outputs="text",
+    examples=["images/image1.jpg", "images/image2.jpg", "images/image3.jpg", "images/image4.jpg"]
+)
+iface.launch(server_name="0.0.0.0", server_port=8080)

images/image1.jpg ADDED Viewed

Git LFS Details

SHA256: 073f8a502e49f4f82bc5c4d9e8ec1ed2917b4e38f33637dcf3026204015aa190
Pointer size: 131 Bytes
Size of remote file: 126 kB

images/image2.jpg ADDED Viewed

Git LFS Details

SHA256: 1dfdaf9fa3ea24fc78ee781d09f71d3d0525735d9f146dc22688dbfa6abb728b
Pointer size: 131 Bytes
Size of remote file: 224 kB

images/image3.jpg ADDED Viewed

Git LFS Details

SHA256: dc1f55df3ae0588d1ae7076ee1c48b8354f777773dcfb525d092ccd2534f54f2
Pointer size: 131 Bytes
Size of remote file: 116 kB

images/image4.jpg ADDED Viewed

Git LFS Details

SHA256: bf9af9384cdd84c9d67b138bc619240a549ffed75723c9e1d67b11028cdcbf00
Pointer size: 131 Bytes
Size of remote file: 134 kB

requirements.txt ADDED Viewed

	@@ -0,0 +1,62 @@

+accelerate==1.4.0
+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.8.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+fastapi==0.115.8
+ffmpy==0.5.0
+filelock==3.17.0
+fsspec==2025.2.0
+gradio==5.17.0
+gradio_client==1.7.1
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.1
+idna==3.10
+Jinja2==3.1.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.4.2
+numpy==2.2.3
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+peft==0.14.0
+pillow==11.1.0
+psutil==7.0.0
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+ruff==0.9.7
+safehttpx==0.1.6
+safetensors==0.5.2
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+starlette==0.45.3
+sympy==1.13.1
+tokenizers==0.21.0
+tomlkit==0.13.2
+torch==2.6.0+cpu
+tqdm==4.67.1
+transformers==4.49.0
+typer==0.15.1
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uvicorn==0.34.0
+websockets==14.2