danielsyahputra commited on
Commit
add05ef
·
verified ·
1 Parent(s): 3259162

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ images/image1.jpg filter=lfs diff=lfs merge=lfs -text
37
+ images/image2.jpg filter=lfs diff=lfs merge=lfs -text
38
+ images/image3.jpg filter=lfs diff=lfs merge=lfs -text
39
+ images/image4.jpg filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import torch
3
+ import gradio as gr
4
+ from PIL import Image
5
+ from transformers import AutoProcessor, AutoModelForVision2Seq
6
+ from transformers.image_utils import load_image
7
+
8
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+
11
+ processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-256M-Instruct")
12
+ model = AutoModelForVision2Seq.from_pretrained(
13
+ "Hirai-Labs/FT-SmolVLM-500M-Instruct-ALPR",
14
+ torch_dtype=torch.bfloat16,
15
+ _attn_implementation="eager" if DEVICE == "cuda" else "eager",
16
+ ).to(DEVICE)
17
+
18
+ # Create input messages
19
+ messages = [
20
+ {
21
+ "role": "user",
22
+ "content": [
23
+ {"type": "text", "text": "You are an AI assistant whose job is to inspect an image and provide the desired information from the image. If the desired field is not clear or not well detected, return None for this field. Do not try to guess."},
24
+ {"type": "image"},
25
+ {"type": "text", "text": 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"type": {"title": "Type", "description": "Return the type of the vehicle", "examples": ["Car", "Truck", "Motorcycle", "Bus"], "type": "string"}, "license_plate": {"title": "License Plate", "description": "Return the license plate number of the vehicle", "type": "string"}, "make": {"title": "Make", "description": "Return the Make of the vehicle", "examples": ["Toyota", "Honda", "Ford", "Suzuki"], "type": "string"}, "model": {"title": "Model", "description": "Return the model of the vehicle", "examples": ["Corolla", "Civic", "F-150"], "type": "string"}, "color": {"title": "Color", "description": "Return the color of the vehicle", "examples": ["Red", "Blue", "Black", "White"], "type": "string"}}, "required": ["type", "license_plate", "make", "model", "color"]}\n```'}
26
+ ]
27
+ }
28
+ ]
29
+
30
+ def predictor(image):
31
+ image = load_image(image=image)
32
+ prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
33
+ inputs = processor(text=prompt, images=[image], return_tensors="pt")
34
+ inputs = inputs.to(DEVICE)
35
+
36
+ generated_ids = model.generate(**inputs, max_new_tokens=500)
37
+ generated_texts = processor.batch_decode(
38
+ generated_ids,
39
+ skip_special_tokens=True,
40
+ )
41
+ output = generated_texts[0]
42
+
43
+
44
+ assistant_part = output.split("Assistant: ")[1]
45
+ dict_data = ast.literal_eval(assistant_part)
46
+ return dict_data
47
+
48
+ iface = gr.Interface(
49
+ fn=predictor,
50
+ inputs=gr.Image(type="pil"),
51
+ outputs="text",
52
+ examples=["images/image1.jpg", "images/image2.jpg", "images/image3.jpg", "images/image4.jpg"]
53
+ )
54
+
55
+ iface.launch(server_name="0.0.0.0", server_port=8080)
images/image1.jpg ADDED

Git LFS Details

  • SHA256: 073f8a502e49f4f82bc5c4d9e8ec1ed2917b4e38f33637dcf3026204015aa190
  • Pointer size: 131 Bytes
  • Size of remote file: 126 kB
images/image2.jpg ADDED

Git LFS Details

  • SHA256: 1dfdaf9fa3ea24fc78ee781d09f71d3d0525735d9f146dc22688dbfa6abb728b
  • Pointer size: 131 Bytes
  • Size of remote file: 224 kB
images/image3.jpg ADDED

Git LFS Details

  • SHA256: dc1f55df3ae0588d1ae7076ee1c48b8354f777773dcfb525d092ccd2534f54f2
  • Pointer size: 131 Bytes
  • Size of remote file: 116 kB
images/image4.jpg ADDED

Git LFS Details

  • SHA256: bf9af9384cdd84c9d67b138bc619240a549ffed75723c9e1d67b11028cdcbf00
  • Pointer size: 131 Bytes
  • Size of remote file: 134 kB
requirements.txt ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.4.0
2
+ aiofiles==23.2.1
3
+ annotated-types==0.7.0
4
+ anyio==4.8.0
5
+ certifi==2025.1.31
6
+ charset-normalizer==3.4.1
7
+ click==8.1.8
8
+ fastapi==0.115.8
9
+ ffmpy==0.5.0
10
+ filelock==3.17.0
11
+ fsspec==2025.2.0
12
+ gradio==5.17.0
13
+ gradio_client==1.7.1
14
+ h11==0.14.0
15
+ httpcore==1.0.7
16
+ httpx==0.28.1
17
+ huggingface-hub==0.29.1
18
+ idna==3.10
19
+ Jinja2==3.1.5
20
+ markdown-it-py==3.0.0
21
+ MarkupSafe==2.1.5
22
+ mdurl==0.1.2
23
+ mpmath==1.3.0
24
+ networkx==3.4.2
25
+ numpy==2.2.3
26
+ orjson==3.10.15
27
+ packaging==24.2
28
+ pandas==2.2.3
29
+ peft==0.14.0
30
+ pillow==11.1.0
31
+ psutil==7.0.0
32
+ pydantic==2.10.6
33
+ pydantic_core==2.27.2
34
+ pydub==0.25.1
35
+ Pygments==2.19.1
36
+ python-dateutil==2.9.0.post0
37
+ python-multipart==0.0.20
38
+ pytz==2025.1
39
+ PyYAML==6.0.2
40
+ regex==2024.11.6
41
+ requests==2.32.3
42
+ rich==13.9.4
43
+ ruff==0.9.7
44
+ safehttpx==0.1.6
45
+ safetensors==0.5.2
46
+ semantic-version==2.10.0
47
+ shellingham==1.5.4
48
+ six==1.17.0
49
+ sniffio==1.3.1
50
+ starlette==0.45.3
51
+ sympy==1.13.1
52
+ tokenizers==0.21.0
53
+ tomlkit==0.13.2
54
+ torch==2.6.0+cpu
55
+ tqdm==4.67.1
56
+ transformers==4.49.0
57
+ typer==0.15.1
58
+ typing_extensions==4.12.2
59
+ tzdata==2025.1
60
+ urllib3==2.3.0
61
+ uvicorn==0.34.0
62
+ websockets==14.2