File size: 6,224 Bytes
168c622
 
 
83f7b7e
168c622
eb0b8f5
8d8b4cc
168c622
 
 
 
 
 
 
eb0b8f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9a151f
 
 
 
 
 
 
 
 
eb0b8f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d8b4cc
eb0b8f5
 
 
 
 
 
 
b9a151f
eb0b8f5
168c622
eb0b8f5
 
168c622
 
 
 
 
 
 
 
 
 
 
eb0b8f5
8d8b4cc
 
 
 
 
 
 
168c622
eb0b8f5
 
 
 
 
168c622
 
 
eb0b8f5
b9a151f
eb0b8f5
 
168c622
 
eb0b8f5
168c622
eb0b8f5
 
 
168c622
 
eb0b8f5
168c622
eb0b8f5
 
168c622
eb0b8f5
 
 
83f7b7e
eb0b8f5
 
 
 
 
 
 
 
 
 
 
168c622
8d8b4cc
 
eb0b8f5
b9a151f
eb0b8f5
 
 
 
 
 
 
168c622
 
8d8b4cc
eb0b8f5
 
 
 
 
 
 
 
168c622
 
b80196d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
import gradio as gr
import torch
from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
from PIL import Image, ImageChops, ImageFilter
import numpy as np
import matplotlib.pyplot as plt
from openai import OpenAI

# 初始化模型
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# 图像处理函数
def compute_difference_images(img_a, img_b):
    # 线稿提取
    def extract_sketch(image):
        grayscale = image.convert("L")
        inverted = ImageChops.invert(grayscale)
        sketch = ImageChops.screen(grayscale, inverted)
        return sketch
    
    # 法向量图像(模拟法向量处理为简单的边缘增强)
    def compute_normal_map(image):
        edges = image.filter(ImageFilter.FIND_EDGES)
        return edges

    # 图像混合差异
    diff_overlay = ImageChops.difference(img_a, img_b)

    return {
        "original_a": img_a,
        "original_b": img_b,
        "sketch_a": extract_sketch(img_a),
        "sketch_b": extract_sketch(img_b),
        "normal_a": compute_normal_map(img_a),
        "normal_b": compute_normal_map(img_b),
        "diff_overlay": diff_overlay
    }

# 保存图像到文件
def save_images(images):
    paths = []
    for key, img in images.items():
        path = f"{key}.png"
        img.save(path)
        paths.append((path, key.replace("_", " ").capitalize()))
    return paths

# BLIP生成更详尽描述
def generate_detailed_caption(image):
    inputs = blip_processor(image, return_tensors="pt")
    caption = blip_model.generate(**inputs, max_length=128, num_beams=5, no_repeat_ngram_size=2)
    return blip_processor.decode(caption[0], skip_special_tokens=True)

# 特征差异可视化
def plot_feature_differences(latent_diff):
    diff_magnitude = [abs(x) for x in latent_diff[0]]
    indices = range(len(diff_magnitude))

    # 柱状图
    plt.figure(figsize=(8, 4))
    plt.bar(indices, diff_magnitude, alpha=0.7)
    plt.xlabel("Feature Index")
    plt.ylabel("Magnitude of Difference")
    plt.title("Feature Differences (Bar Chart)")
    bar_chart_path = "bar_chart.png"
    plt.savefig(bar_chart_path)
    plt.close()

    # 饼图
    plt.figure(figsize=(6, 6))
    plt.pie(diff_magnitude[:10], labels=range(10), autopct="%1.1f%%", startangle=140)
    plt.title("Top 10 Feature Differences (Pie Chart)")
    pie_chart_path = "pie_chart.png"
    plt.savefig(pie_chart_path)
    plt.close()

    return bar_chart_path, pie_chart_path

# 分析函数
def analyze_images(image_a, image_b, api_key):
    # 调用 OpenAI 客户端
    client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")

    # 图像差异处理
    img_a = image_a.convert("RGB")
    img_b = image_b.convert("RGB")
    images_diff = compute_difference_images(img_a, img_b)
    saved_images = save_images(images_diff)

    # BLIP生成描述
    caption_a = generate_detailed_caption(img_a)
    caption_b = generate_detailed_caption(img_b)

    # CLIP特征提取
    def extract_features(image):
        inputs = clip_processor(images=image, return_tensors="pt")
        features = clip_model.get_image_features(**inputs)
        return features.detach().numpy()

    features_a = extract_features(img_a)
    features_b = extract_features(img_b)
    latent_diff = np.abs(features_a - features_b).tolist()

    # 调用 GPT 获取更详细描述
    gpt_response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"}
        ],
        stream=False
    )
    text_analysis = gpt_response.choices[0].message.content.strip()

    # 可视化特征差异
    bar_chart_path, pie_chart_path = plot_feature_differences(latent_diff)

    return {
        "caption_a": caption_a,
        "caption_b": caption_b,
        "text_analysis": text_analysis,
        "saved_images": saved_images,
        "bar_chart": bar_chart_path,
        "pie_chart": pie_chart_path
    }

# Gradio界面
with gr.Blocks() as demo:
    gr.Markdown("# 图像对比分析工具")
    api_key_input = gr.Textbox(label="API Key", placeholder="输入您的 DeepSeek API Key", type="password")

    with gr.Row():
        with gr.Column():
            image_a = gr.Image(label="图片A", type="pil")
        with gr.Column():
            image_b = gr.Image(label="图片B", type="pil")

    analyze_button = gr.Button("分析图片")

    with gr.Row():
        gr.Markdown("## 图像差异")
        result_diff = gr.Gallery(label="混合差异图像")

    with gr.Row():
        result_caption_a = gr.Textbox(label="图片A描述", interactive=False)
        result_caption_b = gr.Textbox(label="图片B描述", interactive=False)

    with gr.Row():
        gr.Markdown("## 差异分析")
        result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
        result_bar_chart = gr.Image(label="特征差异柱状图")
        result_pie_chart = gr.Image(label="特征差异饼图")

    # 分析逻辑
    def process_analysis(img_a, img_b, api_key):
        results = analyze_images(img_a, img_b, api_key)
        return (
            results["saved_images"],
            results["caption_a"],
            results["caption_b"],
            results["text_analysis"],
            results["bar_chart"],
            results["pie_chart"]
        )

    analyze_button.click(
        fn=process_analysis,
        inputs=[image_a, image_b, api_key_input],
        outputs=[
            result_diff,
            result_caption_a,
            result_caption_b,
            result_text_analysis,
            result_bar_chart,
            result_pie_chart
        ]
    )

demo.launch()