Spaces:
Sleeping
Sleeping
File size: 6,224 Bytes
168c622 83f7b7e 168c622 eb0b8f5 8d8b4cc 168c622 eb0b8f5 b9a151f eb0b8f5 8d8b4cc eb0b8f5 b9a151f eb0b8f5 168c622 eb0b8f5 168c622 eb0b8f5 8d8b4cc 168c622 eb0b8f5 168c622 eb0b8f5 b9a151f eb0b8f5 168c622 eb0b8f5 168c622 eb0b8f5 168c622 eb0b8f5 168c622 eb0b8f5 168c622 eb0b8f5 83f7b7e eb0b8f5 168c622 8d8b4cc eb0b8f5 b9a151f eb0b8f5 168c622 8d8b4cc eb0b8f5 168c622 b80196d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
import gradio as gr
import torch
from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
from PIL import Image, ImageChops, ImageFilter
import numpy as np
import matplotlib.pyplot as plt
from openai import OpenAI
# 初始化模型
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
# 图像处理函数
def compute_difference_images(img_a, img_b):
# 线稿提取
def extract_sketch(image):
grayscale = image.convert("L")
inverted = ImageChops.invert(grayscale)
sketch = ImageChops.screen(grayscale, inverted)
return sketch
# 法向量图像(模拟法向量处理为简单的边缘增强)
def compute_normal_map(image):
edges = image.filter(ImageFilter.FIND_EDGES)
return edges
# 图像混合差异
diff_overlay = ImageChops.difference(img_a, img_b)
return {
"original_a": img_a,
"original_b": img_b,
"sketch_a": extract_sketch(img_a),
"sketch_b": extract_sketch(img_b),
"normal_a": compute_normal_map(img_a),
"normal_b": compute_normal_map(img_b),
"diff_overlay": diff_overlay
}
# 保存图像到文件
def save_images(images):
paths = []
for key, img in images.items():
path = f"{key}.png"
img.save(path)
paths.append((path, key.replace("_", " ").capitalize()))
return paths
# BLIP生成更详尽描述
def generate_detailed_caption(image):
inputs = blip_processor(image, return_tensors="pt")
caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2)
return blip_processor.decode(caption[0], skip_special_tokens=True)
# 特征差异可视化
def plot_feature_differences(latent_diff):
diff_magnitude = [abs(x) for x in latent_diff[0]]
indices = range(len(diff_magnitude))
# 柱状图
plt.figure(figsize=(8, 4))
plt.bar(indices, diff_magnitude, alpha=0.7)
plt.xlabel("Feature Index")
plt.ylabel("Magnitude of Difference")
plt.title("Feature Differences (Bar Chart)")
bar_chart_path = "bar_chart.png"
plt.savefig(bar_chart_path)
plt.close()
# 饼图
plt.figure(figsize=(6, 6))
plt.pie(diff_magnitude[:10], labels=range(10), autopct="%1.1f%%", startangle=140)
plt.title("Top 10 Feature Differences (Pie Chart)")
pie_chart_path = "pie_chart.png"
plt.savefig(pie_chart_path)
plt.close()
return bar_chart_path, pie_chart_path
# 分析函数
def analyze_images(image_a, image_b, api_key):
# 调用 OpenAI 客户端
client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
# 图像差异处理
img_a = image_a.convert("RGB")
img_b = image_b.convert("RGB")
images_diff = compute_difference_images(img_a, img_b)
saved_images = save_images(images_diff)
# BLIP生成描述
caption_a = generate_detailed_caption(img_a)
caption_b = generate_detailed_caption(img_b)
# CLIP特征提取
def extract_features(image):
inputs = clip_processor(images=image, return_tensors="pt")
features = clip_model.get_image_features(**inputs)
return features.detach().numpy()
features_a = extract_features(img_a)
features_b = extract_features(img_b)
latent_diff = np.abs(features_a - features_b).tolist()
# 调用 GPT 获取更详细描述
gpt_response = client.chat.completions.create(
model="deepseek-chat",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"}
],
stream=False
)
text_analysis = gpt_response.choices[0].message.content.strip()
# 可视化特征差异
bar_chart_path, pie_chart_path = plot_feature_differences(latent_diff)
return {
"caption_a": caption_a,
"caption_b": caption_b,
"text_analysis": text_analysis,
"saved_images": saved_images,
"bar_chart": bar_chart_path,
"pie_chart": pie_chart_path
}
# Gradio界面
with gr.Blocks() as demo:
gr.Markdown("# 图像对比分析工具")
api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 DeepSeek API Key", type="password")
with gr.Row():
with gr.Column():
image_a = gr.Image(label="图片A", type="pil")
with gr.Column():
image_b = gr.Image(label="图片B", type="pil")
analyze_button = gr.Button("分析图片")
with gr.Row():
gr.Markdown("## 图像差异")
result_diff = gr.Gallery(label="混合差异图像")
with gr.Row():
result_caption_a = gr.Textbox(label="图片A描述", interactive=False)
result_caption_b = gr.Textbox(label="图片B描述", interactive=False)
with gr.Row():
gr.Markdown("## 差异分析")
result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
result_bar_chart = gr.Image(label="特征差异柱状图")
result_pie_chart = gr.Image(label="特征差异饼图")
# 分析逻辑
def process_analysis(img_a, img_b, api_key):
results = analyze_images(img_a, img_b, api_key)
return (
results["saved_images"],
results["caption_a"],
results["caption_b"],
results["text_analysis"],
results["bar_chart"],
results["pie_chart"]
)
analyze_button.click(
fn=process_analysis,
inputs=[image_a, image_b, api_key_input],
outputs=[
result_diff,
result_caption_a,
result_caption_b,
result_text_analysis,
result_bar_chart,
result_pie_chart
]
)
demo.launch() |