Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import torch | |
import faiss | |
from sentence_transformers import SentenceTransformer | |
from transformers import pipeline | |
# Load model for embeddings and QA generation | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
generator = pipeline("text2text-generation", model="facebook/bart-large") | |
# Load your climate news dataset (title + description) | |
def load_data(): | |
df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('') | |
return df | |
corpus = df["text"].tolist() | |
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) | |
# Build FAISS index for fast similarity search | |
index = faiss.IndexFlatL2(corpus_embeddings.shape[1]) | |
index.add(corpus_embeddings.cpu().detach().numpy()) | |
st.title("π§ Climate News Fact Checker") | |
user_input = st.text_input("Enter a claim or statement to verify:") | |
if user_input: | |
# Embed the user query | |
query_embedding = embedder.encode([user_input]) | |
# Search similar news articles | |
top_k = 3 | |
D, I = index.search(query_embedding, top_k) | |
# Get the top matched articles | |
results = [corpus[i] for i in I[0]] | |
# Display retrieved articles | |
st.subheader("π Retrieved News Snippets") | |
for idx, res in enumerate(results): | |
st.write(f"**Snippet {idx+1}:** {res}") | |
# Combine into context for generation | |
context = " ".join(results) | |
prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:" | |
# Generate answer | |
st.subheader("β Fact Check Result") | |
response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text'] | |
st.write(response) | |