ProCreations commited on
Commit
62f171e
·
verified ·
1 Parent(s): 21177e5

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -486
app.py DELETED
@@ -1,486 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Just search - A Smart Search Agent using Menlo/Lucy-128k
4
- Part of the Just, AKA Simple series
5
- Built with Gradio, DuckDuckGo Search, and Hugging Face Transformers
6
- """
7
-
8
- import gradio as gr
9
- import torch
10
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
11
- from duckduckgo_search import DDGS
12
- import json
13
- import re
14
- import time
15
- from typing import List, Dict, Tuple
16
- import spaces
17
-
18
- # Initialize the model and tokenizer globally for efficiency
19
- MODEL_NAME = "Menlo/Lucy-128k"
20
- tokenizer = None
21
- model = None
22
- search_pipeline = None
23
-
24
- def initialize_model():
25
- """Initialize the Menlo/Lucy-128k model and tokenizer"""
26
- global tokenizer, model, search_pipeline
27
- try:
28
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
29
- model = AutoModelForCausalLM.from_pretrained(
30
- MODEL_NAME,
31
- torch_dtype=torch.float16,
32
- device_map="auto",
33
- trust_remote_code=True
34
- )
35
- search_pipeline = pipeline(
36
- "text-generation",
37
- model=model,
38
- tokenizer=tokenizer,
39
- torch_dtype=torch.float16,
40
- device_map="auto",
41
- max_new_tokens=2048,
42
- temperature=0.7,
43
- do_sample=True,
44
- pad_token_id=tokenizer.eos_token_id
45
- )
46
- return True
47
- except Exception as e:
48
- print(f"Error initializing model: {e}")
49
- return False
50
-
51
- def clean_response(text: str) -> str:
52
- """Clean up the AI response to extract just the relevant content"""
53
- # Remove common prefixes and artifacts
54
- text = re.sub(r'^(Assistant:|AI:|Response:|Answer:)\s*', '', text.strip())
55
- text = re.sub(r'\[INST\].*?\[\/INST\]', '', text)
56
- text = re.sub(r'<\|.*?\|>', '', text)
57
- return text.strip()
58
-
59
- @spaces.GPU
60
- def generate_search_queries(user_query: str) -> List[str]:
61
- """Generate multiple search queries based on user input using AI"""
62
- prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
63
- You are a search query generator. Given a user's question, generate 3-5 different search queries that would help find comprehensive information to answer their question. Return only the search queries, one per line, without numbering or bullet points.
64
-
65
- Example:
66
- User: "What are the latest developments in AI?"
67
- latest AI developments 2024
68
- artificial intelligence breakthroughs recent
69
- AI technology advances news
70
- machine learning innovations 2024
71
-
72
- <|eot_id|><|start_header_id|>user<|end_header_id|>
73
- {user_query}
74
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
75
-
76
- try:
77
- response = search_pipeline(prompt, max_new_tokens=200, temperature=0.3)
78
- generated_text = response[0]['generated_text']
79
-
80
- # Extract just the assistant's response
81
- assistant_response = generated_text.split('<|start_header_id|>assistant<|end_header_id|>')[-1]
82
- assistant_response = clean_response(assistant_response)
83
-
84
- # Split into individual queries and clean them
85
- queries = [q.strip() for q in assistant_response.split('\n') if q.strip()]
86
- # Filter out any non-query text
87
- queries = [q for q in queries if len(q) > 5 and not q.startswith('Note:') and not q.startswith('Example:')]
88
-
89
- return queries[:5] # Return max 5 queries
90
- except Exception as e:
91
- print(f"Error generating queries: {e}")
92
- # Fallback to simple query variations
93
- return [user_query, f"{user_query} 2024", f"{user_query} latest"]
94
-
95
- def search_web(queries: List[str]) -> List[Dict]:
96
- """Search the web using DuckDuckGo with multiple queries"""
97
- all_results = []
98
- ddgs = DDGS()
99
-
100
- for query in queries:
101
- try:
102
- results = ddgs.text(query, max_results=5, region='wt-wt', safesearch='moderate')
103
- for result in results:
104
- result['search_query'] = query
105
- all_results.append(result)
106
- time.sleep(0.5) # Rate limiting
107
- except Exception as e:
108
- print(f"Error searching for '{query}': {e}")
109
- continue
110
-
111
- # Remove duplicates based on URL
112
- seen_urls = set()
113
- unique_results = []
114
- for result in all_results:
115
- if result['href'] not in seen_urls:
116
- seen_urls.add(result['href'])
117
- unique_results.append(result)
118
-
119
- return unique_results[:15] # Return max 15 results
120
-
121
- @spaces.GPU
122
- def filter_relevant_results(user_query: str, search_results: List[Dict]) -> List[Dict]:
123
- """Use AI to filter and rank search results by relevance"""
124
- if not search_results:
125
- return []
126
-
127
- # Prepare results summary for AI
128
- results_text = ""
129
- for i, result in enumerate(search_results[:12]): # Limit to avoid token overflow
130
- results_text += f"{i+1}. Title: {result.get('title', 'No title')}\n"
131
- results_text += f" URL: {result.get('href', 'No URL')}\n"
132
- results_text += f" Snippet: {result.get('body', 'No description')[:200]}...\n\n"
133
-
134
- prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
135
- You are a search result evaluator. Given a user's question and search results, identify which results are most relevant and helpful for answering the question.
136
-
137
- Return only the numbers of the most relevant results (1-5 results maximum), separated by commas. Consider:
138
- - Direct relevance to the question
139
- - Credibility of the source
140
- - Recency of information
141
- - Comprehensiveness of content
142
-
143
- Example response: 1, 3, 7
144
-
145
- <|eot_id|><|start_header_id|>user<|end_header_id|>
146
- Question: {user_query}
147
-
148
- Search Results:
149
- {results_text}
150
-
151
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
152
-
153
- try:
154
- response = search_pipeline(prompt, max_new_tokens=100, temperature=0.1)
155
- generated_text = response[0]['generated_text']
156
-
157
- # Extract assistant's response
158
- assistant_response = generated_text.split('<|start_header_id|>assistant<|end_header_id|>')[-1]
159
- assistant_response = clean_response(assistant_response)
160
-
161
- # Extract numbers
162
- numbers = re.findall(r'\d+', assistant_response)
163
- selected_indices = [int(n) - 1 for n in numbers if int(n) <= len(search_results)]
164
-
165
- return [search_results[i] for i in selected_indices if 0 <= i < len(search_results)][:5]
166
- except Exception as e:
167
- print(f"Error filtering results: {e}")
168
- return search_results[:5] # Fallback to first 5 results
169
-
170
- @spaces.GPU
171
- def generate_final_answer(user_query: str, selected_results: List[Dict]) -> str:
172
- """Generate final answer based on selected search results"""
173
- if not selected_results:
174
- return "I couldn't find relevant information to answer your question. Please try rephrasing your query."
175
-
176
- # Prepare context from selected results
177
- context = ""
178
- for i, result in enumerate(selected_results):
179
- context += f"Source {i+1}: {result.get('title', 'Unknown')}\n"
180
- context += f"Content: {result.get('body', 'No content available')}\n"
181
- context += f"URL: {result.get('href', 'No URL')}\n\n"
182
-
183
- prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
184
- You are a helpful research assistant. Based on the provided search results, give a comprehensive answer to the user's question.
185
-
186
- Guidelines:
187
- - Synthesize information from multiple sources
188
- - Be accurate and factual
189
- - Cite sources when possible
190
- - If information is conflicting, mention it
191
- - Keep the answer well-structured and easy to read
192
- - Include relevant URLs for further reading
193
-
194
- <|eot_id|><|start_header_id|>user<|end_header_id|>
195
- Question: {user_query}
196
-
197
- Search Results:
198
- {context}
199
-
200
- Please provide a comprehensive answer based on these sources.
201
-
202
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
203
-
204
- try:
205
- response = search_pipeline(prompt, max_new_tokens=1024, temperature=0.2)
206
- generated_text = response[0]['generated_text']
207
-
208
- # Extract assistant's response
209
- assistant_response = generated_text.split('<|start_header_id|>assistant<|end_header_id|>')[-1]
210
- answer = clean_response(assistant_response)
211
-
212
- return answer
213
- except Exception as e:
214
- print(f"Error generating final answer: {e}")
215
- return "I encountered an error while processing the search results. Please try again."
216
-
217
- def search_agent_workflow(user_query: str, progress=gr.Progress()) -> Tuple[str, str]:
218
- """Main workflow that orchestrates the search agent"""
219
- if not user_query.strip():
220
- return "Please enter a search query.", ""
221
-
222
- progress(0.1, desc="Initializing...")
223
-
224
- # Step 1: Generate search queries
225
- progress(0.2, desc="Generating search queries...")
226
- queries = generate_search_queries(user_query)
227
- queries_text = "Generated queries:\n" + "\n".join(f"• {q}" for q in queries)
228
-
229
- # Step 2: Search the web
230
- progress(0.4, desc="Searching the web...")
231
- search_results = search_web(queries)
232
-
233
- if not search_results:
234
- return "No search results found. Please try a different query.", queries_text
235
-
236
- # Step 3: Filter relevant results
237
- progress(0.6, desc="Filtering relevant results...")
238
- relevant_results = filter_relevant_results(user_query, search_results)
239
-
240
- # Step 4: Generate final answer
241
- progress(0.8, desc="Generating comprehensive answer...")
242
- final_answer = generate_final_answer(user_query, relevant_results)
243
-
244
- progress(1.0, desc="Complete!")
245
-
246
- # Prepare debug info
247
- debug_info = f"{queries_text}\n\nSelected {len(relevant_results)} relevant sources:\n"
248
- for i, result in enumerate(relevant_results):
249
- debug_info += f"{i+1}. {result.get('title', 'No title')} - {result.get('href', 'No URL')}\n"
250
-
251
- return final_answer, debug_info
252
-
253
- # Custom CSS for dark blue theme and mobile responsiveness
254
- custom_css = """
255
- /* Dark blue theme */
256
- :root {
257
- --primary-bg: #0a1628;
258
- --secondary-bg: #1e3a5f;
259
- --accent-bg: #2563eb;
260
- --text-primary: #f8fafc;
261
- --text-secondary: #cbd5e1;
262
- --border-color: #334155;
263
- --input-bg: #1e293b;
264
- --button-bg: #3b82f6;
265
- --button-hover: #2563eb;
266
- }
267
-
268
- /* Global styles */
269
- .gradio-container {
270
- background: linear-gradient(135deg, var(--primary-bg) 0%, var(--secondary-bg) 100%) !important;
271
- color: var(--text-primary) !important;
272
- font-family: 'Inter', 'Segoe UI', system-ui, sans-serif !important;
273
- }
274
-
275
- /* Mobile responsiveness */
276
- @media (max-width: 768px) {
277
- .gradio-container {
278
- padding: 10px !important;
279
- }
280
-
281
- .gr-form {
282
- gap: 15px !important;
283
- }
284
-
285
- .gr-button {
286
- font-size: 16px !important;
287
- padding: 12px 20px !important;
288
- }
289
- }
290
-
291
- /* Input styling */
292
- .gr-textbox textarea, .gr-textbox input {
293
- background: var(--input-bg) !important;
294
- border: 1px solid var(--border-color) !important;
295
- color: var(--text-primary) !important;
296
- border-radius: 8px !important;
297
- }
298
-
299
- /* Button styling */
300
- .gr-button {
301
- background: linear-gradient(135deg, var(--button-bg) 0%, var(--accent-bg) 100%) !important;
302
- color: white !important;
303
- border: none !important;
304
- border-radius: 8px !important;
305
- font-weight: 600 !important;
306
- transition: all 0.3s ease !important;
307
- }
308
-
309
- .gr-button:hover {
310
- background: linear-gradient(135deg, var(--button-hover) 0%, var(--button-bg) 100%) !important;
311
- transform: translateY(-1px) !important;
312
- box-shadow: 0 4px 12px rgba(59, 130, 246, 0.3) !important;
313
- }
314
-
315
- /* Output styling */
316
- .gr-markdown, .gr-textbox {
317
- background: var(--input-bg) !important;
318
- border: 1px solid var(--border-color) !important;
319
- border-radius: 8px !important;
320
- color: var(--text-primary) !important;
321
- }
322
-
323
- /* Header styling */
324
- .gr-markdown h1 {
325
- color: var(--accent-bg) !important;
326
- text-align: center !important;
327
- margin-bottom: 20px !important;
328
- font-size: 2.5rem !important;
329
- font-weight: 700 !important;
330
- }
331
-
332
- /* Loading animation */
333
- .gr-loading {
334
- background: var(--secondary-bg) !important;
335
- border-radius: 8px !important;
336
- }
337
-
338
- /* Scrollbar styling */
339
- ::-webkit-scrollbar {
340
- width: 8px;
341
- }
342
-
343
- ::-webkit-scrollbar-track {
344
- background: var(--primary-bg);
345
- }
346
-
347
- ::-webkit-scrollbar-thumb {
348
- background: var(--accent-bg);
349
- border-radius: 4px;
350
- }
351
-
352
- ::-webkit-scrollbar-thumb:hover {
353
- background: var(--button-hover);
354
- }
355
- """
356
-
357
- def create_interface():
358
- """Create the Gradio interface"""
359
- with gr.Blocks(
360
- theme=gr.themes.Base(
361
- primary_hue="blue",
362
- secondary_hue="slate",
363
- neutral_hue="slate",
364
- text_size="lg",
365
- spacing_size="lg",
366
- radius_size="md"
367
- ).set(
368
- body_background_fill="*primary_950",
369
- body_text_color="*neutral_50",
370
- background_fill_primary="*primary_900",
371
- background_fill_secondary="*primary_800",
372
- border_color_primary="*primary_700",
373
- button_primary_background_fill="*primary_600",
374
- button_primary_background_fill_hover="*primary_500",
375
- button_primary_text_color="white",
376
- input_background_fill="*primary_800",
377
- input_border_color="*primary_600",
378
- input_text_color="*neutral_50"
379
- ),
380
- css=custom_css,
381
- title="Just search - AI Search Agent",
382
- head="<meta name='viewport' content='width=device-width, initial-scale=1.0'>"
383
- ) as interface:
384
-
385
- gr.Markdown("# 🔍 Just search", elem_id="header")
386
- gr.Markdown(
387
- "*Part of the Just, AKA Simple series*\n\n"
388
- "**Intelligent search agent powered by Menlo/Lucy-128k**\n\n"
389
- "Ask any question and get comprehensive answers from the web.",
390
- elem_id="description"
391
- )
392
-
393
- with gr.Row():
394
- with gr.Column(scale=4):
395
- query_input = gr.Textbox(
396
- label="Your Question",
397
- placeholder="Ask me anything... (e.g., 'What are the latest developments in AI?')",
398
- lines=2,
399
- elem_id="query-input"
400
- )
401
- with gr.Column(scale=1):
402
- search_btn = gr.Button(
403
- "🔎 Search",
404
- variant="primary",
405
- size="lg",
406
- elem_id="search-button"
407
- )
408
-
409
- with gr.Row():
410
- answer_output = gr.Markdown(
411
- label="Answer",
412
- elem_id="answer-output",
413
- height=400
414
- )
415
-
416
- with gr.Accordion("🔧 Debug Info", open=False):
417
- debug_output = gr.Textbox(
418
- label="Search Process Details",
419
- lines=8,
420
- elem_id="debug-output"
421
- )
422
-
423
- # Event handlers
424
- search_btn.click(
425
- fn=search_agent_workflow,
426
- inputs=[query_input],
427
- outputs=[answer_output, debug_output],
428
- show_progress=True
429
- )
430
-
431
- query_input.submit(
432
- fn=search_agent_workflow,
433
- inputs=[query_input],
434
- outputs=[answer_output, debug_output],
435
- show_progress=True
436
- )
437
-
438
- # Example queries
439
- gr.Examples(
440
- examples=[
441
- ["What are the latest breakthroughs in quantum computing?"],
442
- ["How does climate change affect ocean currents?"],
443
- ["What are the best practices for sustainable agriculture?"],
444
- ["Explain the recent developments in renewable energy technology"],
445
- ["What are the health benefits of the Mediterranean diet?"]
446
- ],
447
- inputs=query_input,
448
- outputs=[answer_output, debug_output],
449
- fn=search_agent_workflow,
450
- cache_examples=False
451
- )
452
-
453
- gr.Markdown(
454
- "---\n**Note:** This search agent generates multiple queries, searches the web, "
455
- "filters results for relevance, and provides comprehensive answers. "
456
- "Results are sourced from DuckDuckGo search."
457
- )
458
-
459
- return interface
460
-
461
- def main():
462
- """Main function to initialize and launch the app"""
463
- print("🚀 Initializing Just search...")
464
-
465
- # Initialize the model
466
- if not initialize_model():
467
- print("❌ Failed to initialize model. Please check your setup.")
468
- return
469
-
470
- print("✅ Model initialized successfully!")
471
- print("🌐 Creating interface...")
472
-
473
- # Create and launch the interface
474
- interface = create_interface()
475
-
476
- print("🎉 Just search is ready!")
477
- interface.launch(
478
- server_name="0.0.0.0",
479
- server_port=7860,
480
- share=True,
481
- show_error=True,
482
- debug=True
483
- )
484
-
485
- if __name__ == "__main__":
486
- main()