import gradio as gr import torch from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration from PIL import Image, ImageChops import numpy as np import matplotlib.pyplot as plt from openai import OpenAI # 初始化模型 clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") # 图像处理函数 def compute_difference_images(img_a, img_b): # 线稿提取 def extract_sketch(image): grayscale = image.convert("L") inverted = ImageChops.invert(grayscale) sketch = ImageChops.screen(grayscale, inverted) return sketch # 法向量图像(模拟法向量处理为简单的边缘增强) def compute_normal_map(image): edges = image.filter(ImageFilter.FIND_EDGES) return edges # 图像混合差异 diff_overlay = ImageChops.difference(img_a, img_b) return { "original_a": img_a, "original_b": img_b, "sketch_a": extract_sketch(img_a), "sketch_b": extract_sketch(img_b), "normal_a": compute_normal_map(img_a), "normal_b": compute_normal_map(img_b), "diff_overlay": diff_overlay } # BLIP生成更详尽描述 def generate_detailed_caption(image): inputs = blip_processor(image, return_tensors="pt") caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2) return blip_processor.decode(caption[0], skip_special_tokens=True) # 特征差异可视化 def plot_feature_differences(latent_diff): diff_magnitude = [abs(x) for x in latent_diff[0]] indices = range(len(diff_magnitude)) # 柱状图 plt.figure(figsize=(8, 4)) plt.bar(indices, diff_magnitude, alpha=0.7) plt.xlabel("Feature Index") plt.ylabel("Magnitude of Difference") plt.title("Feature Differences (Bar Chart)") bar_chart_path = "bar_chart.png" plt.savefig(bar_chart_path) plt.close() # 饼图 plt.figure(figsize=(6, 6)) plt.pie(diff_magnitude[:10], labels=range(10), autopct="%1.1f%%", startangle=140) plt.title("Top 10 Feature Differences (Pie Chart)") pie_chart_path = "pie_chart.png" plt.savefig(pie_chart_path) plt.close() return bar_chart_path, pie_chart_path # 分析函数 def analyze_images(image_a, image_b, api_key): # 调用 OpenAI 客户端 client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com") # 图像差异处理 img_a = image_a.convert("RGB") img_b = image_b.convert("RGB") images_diff = compute_difference_images(img_a, img_b) # BLIP生成描述 caption_a = generate_detailed_caption(img_a) caption_b = generate_detailed_caption(img_b) # CLIP特征提取 def extract_features(image): inputs = clip_processor(images=image, return_tensors="pt") features = clip_model.get_image_features(**inputs) return features.detach().numpy() features_a = extract_features(img_a) features_b = extract_features(img_b) latent_diff = np.abs(features_a - features_b).tolist() # 调用 GPT 获取更详细描述 gpt_response = client.chat.completions.create( model="deepseek-chat", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"} ], stream=False ) text_analysis = gpt_response.choices[0].message.content.strip() # 可视化特征差异 bar_chart_path, pie_chart_path = plot_feature_differences(latent_diff) return { "caption_a": caption_a, "caption_b": caption_b, "text_analysis": text_analysis, "images_diff": images_diff, "bar_chart": bar_chart_path, "pie_chart": pie_chart_path } # Gradio界面 with gr.Blocks() as demo: gr.Markdown("# 图像对比分析工具") api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 DeepSeek API Key", type="password") with gr.Row(): with gr.Column(): image_a = gr.Image(label="图片A", type="pil") with gr.Column(): image_b = gr.Image(label="图片B", type="pil") analyze_button = gr.Button("分析图片") with gr.Row(): gr.Markdown("## 图像差异") result_diff = gr.Gallery(label="混合差异图像").style(grid=3) with gr.Row(): result_caption_a = gr.Textbox(label="图片A描述", interactive=False) result_caption_b = gr.Textbox(label="图片B描述", interactive=False) with gr.Row(): gr.Markdown("## 差异分析") result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5) result_bar_chart = gr.Image(label="特征差异柱状图") result_pie_chart = gr.Image(label="特征差异饼图") # 分析逻辑 def process_analysis(img_a, img_b, api_key): results = analyze_images(img_a, img_b, api_key) diff_images = [ ("Original A", results["images_diff"]["original_a"]), ("Original B", results["images_diff"]["original_b"]), ("Sketch A", results["images_diff"]["sketch_a"]), ("Sketch B", results["images_diff"]["sketch_b"]), ("Normal A", results["images_diff"]["normal_a"]), ("Normal B", results["images_diff"]["normal_b"]), ("Difference Overlay", results["images_diff"]["diff_overlay"]), ] return ( diff_images, results["caption_a"], results["caption_b"], results["text_analysis"], results["bar_chart"], results["pie_chart"] ) analyze_button.click( fn=process_analysis, inputs=[image_a, image_b, api_key_input], outputs=[ result_diff, result_caption_a, result_caption_b, result_text_analysis, result_bar_chart, result_pie_chart ] ) demo.launch()