Spaces:

kakaoent
/

webtoon_cropper

Running

App Files Files Community

webtoon_cropper / src /wise_crop /detect_and_crop.py

wise-water

init commit

13aa528 4 months ago

raw

history blame contribute delete

3.01 kB

	import cv2
	import numpy as np
	from torchvision.transforms.v2 import ToPILImage
	from PIL import Image
	from transformers import pipeline
	import torch
	from imgutils.detect import detect_heads
	from src.utils.device import determine_accelerator
	topil = ToPILImage()

	# 1. Initialize the filtering pipeline
	device = determine_accelerator()

	print("Loading AI Model...")
	pipe = pipeline(
	"image-text-to-text",
	model="google/gemma-3-12b-it",
	device=device,
	torch_dtype=torch.bfloat16,
	)

	def crop_and_mask_characters_gradio(pil_img):
	"""
	Crops character regions from an image, saves them as separate files,
	and generates binary masks for each cropped region using the Gemini 2.0 Flash Exp model.

	Args:
	uploaded_file_obj (str): The path to the input image.
	"""
	img = np.array(pil_img)

	# Convert the image to grayscale
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	# Apply thresholding to create a binary image
	_, thresh = cv2.threshold(gray, 253, 255, cv2.THRESH_BINARY_INV)

	# Find contours in the binary image
	contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Create output directories if they don't exist
	# cropped_dir = Path(temp_dir) / 'cropped_dir'
	# masks_dir = cropped_dir

	# os.makedirs(cropped_dir, exist_ok=True)
	# os.makedirs(masks_dir, exist_ok=True)
	coord_info_list = []
	i = 0
	# Iterate through the contours and crop the regions
	for contour in contours:
	# Get the bounding box of the contour
	x, y, w, h = cv2.boundingRect(contour)
	if w < 256 or h < 256: # Skip small contours
	continue

	# Crop the region
	cropped_img = img[y:y+h, x:x+w]

	messages = [
	{
	"role": "system",
	"content": [{"type": "text", "text": "You are a helpful assistant."}]
	},
	{
	"role": "user",
	"content": [
	{"type": "image", "image": topil(cropped_img)},
	{"type": "text", "text": "You are given a black-and-white line drawing as input. Please analyze the image carefully. If the drawing contains the majority of a head or face—meaning most key facial features or the overall shape of the head are visible—respond with 'True'. Otherwise, respond with 'False'. Do not contain any punctuation or extra spaces in your answer. Just respond with 'True' or 'False'"}
	]
	}
	]
	result = detect_heads(topil(cropped_img))
	if len(result) == 0:
	continue

	output = pipe(text=messages, max_new_tokens=200)
	if output[0]["generated_text"][-1]["content"] == 'False':
	# print(f"Skipping character {i+1} as it does not contain a head or face.")
	continue
	i += 1
	# Append the coordinates to the list
	coord_info_list.append((i,(x,y,w,h)))
	return coord_info_list