import streamlit as st import pandas as pd import torch import faiss from sentence_transformers import SentenceTransformer from transformers import pipeline # Load model for embeddings and QA generation embedder = SentenceTransformer('all-MiniLM-L6-v2') generator = pipeline("text2text-generation", model="facebook/bart-large") # Load your climate news dataset (title + description) @st.cache_data def load_data(): df = pd.read_csv("climate_news.csv") # Make sure your zip extracts to this df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('') return df df = load_data() corpus = df["text"].tolist() corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) # Build FAISS index for fast similarity search index = faiss.IndexFlatL2(corpus_embeddings.shape[1]) index.add(corpus_embeddings.cpu().detach().numpy()) st.title("🧠 Climate News Fact Checker") user_input = st.text_input("Enter a claim or statement to verify:") if user_input: # Embed the user query query_embedding = embedder.encode([user_input]) # Search similar news articles top_k = 3 D, I = index.search(query_embedding, top_k) # Get the top matched articles results = [corpus[i] for i in I[0]] # Display retrieved articles st.subheader("🔍 Retrieved News Snippets") for idx, res in enumerate(results): st.write(f"**Snippet {idx+1}:** {res}") # Combine into context for generation context = " ".join(results) prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:" # Generate answer st.subheader("✅ Fact Check Result") response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text'] st.write(response)