import gradio as gr import requests import time import os # Use localhost for HF Spaces since both services run in the same container API_BASE_URL = "http://localhost:8000" def extract_links(url): """Extract links from the given URL""" endpoint = f"{API_BASE_URL}/extract_links" payload = {"url": url} try: response = requests.post(endpoint, json=payload, timeout=30) if response.status_code == 200: return response.json()["unique_links"] else: raise Exception(f"Failed to extract links: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def extract_text(urls): """Extract text from URLs""" endpoint = f"{API_BASE_URL}/extract_text" try: response = requests.post(endpoint, json=urls, timeout=60) if response.status_code == 200: return response.json()["file_saved"] else: raise Exception(f"Failed to extract text: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def perform_rag(file_path, prompt): """Perform RAG on the extracted text""" endpoint = f"{API_BASE_URL}/rag" payload = {"file_path": file_path, "prompt": prompt} try: response = requests.post(endpoint, json=payload, timeout=60) if response.status_code == 200: return response.json() else: raise Exception(f"Failed to perform RAG: {response.text}") except requests.exceptions.RequestException as e: raise Exception(f"Connection error: {str(e)}") def check_api_health(): """Check if FastAPI is running""" try: response = requests.get(f"{API_BASE_URL}/", timeout=5) return response.status_code == 200 except: return False def process_multiple_links(url, prompt): """Process multiple links from a webpage""" if not url or not prompt: return "❌ Error: Please provide both URL and prompt", "", "" # Check API health first if not check_api_health(): return "❌ Error: FastAPI service is not available. Please wait a moment and try again.", "", "" try: links = extract_links(url) sample_links = links[:5] file_path = extract_text(sample_links) result = perform_rag(file_path, prompt) status_msg = f"✅ Processed {len(sample_links)} pages from {len(links)} total links found" response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}" sources_text = result['sources'] return status_msg, response_text, sources_text except Exception as e: return f"❌ Error: {str(e)}", "", "" def process_homepage_only(url, prompt): """Process homepage content only""" if not url or not prompt: return "❌ Error: Please provide both URL and prompt", "", "" # Check API health first if not check_api_health(): return "❌ Error: FastAPI service is not available. Please wait a moment and try again.", "", "" try: file_path = extract_text([url]) result = perform_rag(file_path, prompt) status_msg = "✅ Processed homepage content" response_text = f"**Query:** {result['user_query']}\n\n**Response:** {result['assistant_response']}" sources_text = result['sources'] return status_msg, response_text, sources_text except Exception as e: return f"❌ Error: {str(e)}", "", "" def extract_links_only(url): """Extract and display links from a webpage""" if not url: return "❌ Error: Please provide a URL" # Check API health first if not check_api_health(): return "❌ Error: FastAPI service is not available. Please wait a moment and try again." try: links = extract_links(url) links_text = "\n".join([f"• {link}" for link in links]) return f"✅ Found {len(links)} links:\n\n{links_text}" except Exception as e: return f"❌ Error: {str(e)}" def extract_text_only(url): """Extract and display text from a webpage""" if not url: return "❌ Error: Please provide a URL" # Check API health first if not check_api_health(): return "❌ Error: FastAPI service is not available. Please wait a moment and try again." try: file_path = extract_text([url]) return f"✅ Text extracted and saved to: {file_path}" except Exception as e: return f"❌ Error: {str(e)}" # Create individual interfaces for each tab multiple_links_demo = gr.Interface( process_multiple_links, inputs=[ gr.Textbox(label="🔗 Website URL", placeholder="https://example.com"), gr.Textbox(label="❓ Your Question", placeholder="What is this website about?", lines=3) ], outputs=[ gr.Textbox(label="📊 Status"), gr.Textbox(label="🤖 AI Response", lines=8), gr.Textbox(label="📚 Sources", lines=6) ], title=" \n🌐 Multiple Links Analysis", allow_flagging="never" ) homepage_only_demo = gr.Interface( process_homepage_only, inputs=[ gr.Textbox(label="🔗 Website URL", placeholder="https://example.com"), gr.Textbox(label="❓ Your Question", placeholder="What is this website about?", lines=3) ], outputs=[ gr.Textbox(label="📊 Status"), gr.Textbox(label="🤖 AI Response", lines=8), gr.Textbox(label="📚 Sources", lines=6) ], title=" \n🏠 Homepage Only Analysis", allow_flagging="never" ) # Tab names tab_names = ["Multiple Links Analysis", "Homepage Only Analysis"] # Main interface with tabs with gr.Blocks(theme=gr.themes.Soft()) as interface: with gr.Row(): with gr.Column(scale=1, min_width=1200): gr.Markdown("## \n\n", label="") gr.HTML("""
Extract content from web pages and ask questions using AI-powered retrieval
URL: https://openai.com
Question: What are the main products and services offered?
ℹ️ Note: If you encounter connection errors, please wait a moment for the system to initialize and try again.