PSNbst commited on
Commit
168c622
·
verified ·
1 Parent(s): 617f7a3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import CLIPProcessor, CLIPModel, BlipProcessor, BlipForConditionalGeneration
4
+ from PIL import Image
5
+ import numpy as np
6
+ import openai # GPT API 调用
7
+
8
+ # 初始化模型
9
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
10
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
11
+ blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
12
+ blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
13
+
14
+ # GPT API 配置
15
+ openai.api_key = "your_openai_api_key"
16
+
17
+ # 定义功能函数
18
+ def analyze_images(image_a, image_b):
19
+ # BLIP生成描述
20
+ def generate_caption(image):
21
+ inputs = blip_processor(image, return_tensors="pt")
22
+ caption = blip_model.generate(**inputs)
23
+ return blip_processor.decode(caption[0], skip_special_tokens=True)
24
+
25
+ # CLIP特征提取
26
+ def extract_features(image):
27
+ inputs = clip_processor(images=image, return_tensors="pt")
28
+ features = clip_model.get_image_features(**inputs)
29
+ return features.detach().numpy()
30
+
31
+ # 加载图片
32
+ img_a = Image.open(image_a).convert("RGB")
33
+ img_b = Image.open(image_b).convert("RGB")
34
+
35
+ # 生成描述
36
+ caption_a = generate_caption(img_a)
37
+ caption_b = generate_caption(img_b)
38
+
39
+ # 提取特征
40
+ features_a = extract_features(img_a)
41
+ features_b = extract_features(img_b)
42
+
43
+ # 计算嵌入相似性
44
+ cosine_similarity = np.dot(features_a, features_b.T) / (np.linalg.norm(features_a) * np.linalg.norm(features_b))
45
+ latent_diff = np.abs(features_a - features_b).tolist()
46
+
47
+ # GPT API 调用生成文字描述
48
+ gpt_prompt = (
49
+ f"图片A的描述为:{caption_a}。图片B的描述为:{caption_b}。\n"
50
+ "请对两张图片的内容和潜在特征区别进行详细分析,并输出一个简洁但富有条理的总结。"
51
+ )
52
+ gpt_response = openai.Completion.create(
53
+ engine="text-davinci-003",
54
+ prompt=gpt_prompt,
55
+ max_tokens=150
56
+ )
57
+ textual_analysis = gpt_response['choices'][0]['text'].strip()
58
+
59
+ # 返回结果
60
+ return {
61
+ "caption_a": caption_a,
62
+ "caption_b": caption_b,
63
+ "similarity": cosine_similarity[0][0],
64
+ "latent_diff": latent_diff,
65
+ "text_analysis": textual_analysis
66
+ }
67
+
68
+ # 定义Gradio界面
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown("# 图片对比分析工具")
71
+
72
+ with gr.Row():
73
+ with gr.Column():
74
+ image_a = gr.Image(label="图片A", type="file")
75
+ with gr.Column():
76
+ image_b = gr.Image(label="图片B", type="file")
77
+
78
+ analyze_button = gr.Button("分析图片")
79
+ result_caption_a = gr.Textbox(label="图片A描述", interactive=False)
80
+ result_caption_b = gr.Textbox(label="图片B描述", interactive=False)
81
+ result_similarity = gr.Number(label="图片相似性", interactive=False)
82
+ result_latent_diff = gr.DataFrame(label="潜在特征差异", interactive=False)
83
+ result_text_analysis = gr.Textbox(label="详细分析", interactive=False, lines=5)
84
+
85
+ # 分析逻辑
86
+ def process_analysis(img_a, img_b):
87
+ results = analyze_images(img_a, img_b)
88
+ return results["caption_a"], results["caption_b"], results["similarity"], results["latent_diff"], results["text_analysis"]
89
+
90
+ analyze_button.click(
91
+ fn=process_analysis,
92
+ inputs=[image_a, image_b],
93
+ outputs=[result_caption_a, result_caption_b, result_similarity, result_latent_diff, result_text_analysis]
94
+ )
95
+
96
+ demo.launch()