rahideer commited on
Commit
ad4cc62
·
verified ·
1 Parent(s): 26a3d40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -35
app.py CHANGED
@@ -1,44 +1,54 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from datasets import load_dataset
4
- from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration
 
 
5
 
6
- # Load AG News dataset from Hugging Face
7
- dataset = load_dataset("kk0105/ag-news", split="train")
 
8
 
9
- # Tokenizer and Model setup for RAG
10
- tokenizer = RagTokenizer.from_pretrained("facebook/rag-token-nq")
11
- retriever = RagRetriever.from_pretrained("facebook/rag-token-nq", index_name="default")
12
- model = RagSequenceForGeneration.from_pretrained("facebook/rag-token-nq")
 
 
13
 
14
- # Function to generate response using RAG
15
- def generate_answer(query):
16
- # Tokenize input query
17
- inputs = tokenizer(query, return_tensors="pt")
18
 
19
- # Retrieve relevant documents from dataset
20
- input_ids = inputs["input_ids"]
21
- question_embedding = retriever.compute_question_embeddings(input_ids)
22
- context_input_ids = retriever.retrieve(input_ids, question_embedding)
23
 
24
- # Generate an answer using the retrieved context
25
- outputs = model.generate(input_ids=input_ids, context_input_ids=context_input_ids)
26
-
27
- # Decode the answer and return it
28
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
29
- return answer
30
-
31
- # Streamlit interface
32
- st.title("News Fact Checker")
33
- st.write("""
34
- **Welcome to the News Fact Checker!**
35
- Input a claim or question about a news topic, and we will verify or refute it based on recent news snippets.
36
- """)
37
 
38
- # User input for claim
39
- user_claim = st.text_input("Enter your claim or question:")
40
 
41
- if user_claim:
42
- with st.spinner('Fetching relevant news snippets...'):
43
- answer = generate_answer(user_claim)
44
- st.write(f"**Fact Check Answer:** {answer}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import torch
4
+ import faiss
5
+ from sentence_transformers import SentenceTransformer
6
+ from transformers import pipeline
7
 
8
+ # Load model for embeddings and QA generation
9
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
10
+ generator = pipeline("text2text-generation", model="facebook/bart-large")
11
 
12
+ # Load your climate news dataset (title + description)
13
+ @st.cache_data
14
+ def load_data():
15
+ df = pd.read_csv("climate_news.csv") # Make sure your zip extracts to this
16
+ df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('')
17
+ return df
18
 
19
+ df = load_data()
20
+ corpus = df["text"].tolist()
21
+ corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
 
22
 
23
+ # Build FAISS index for fast similarity search
24
+ index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
25
+ index.add(corpus_embeddings.cpu().detach().numpy())
 
26
 
27
+ st.title("🧠 Climate News Fact Checker")
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ user_input = st.text_input("Enter a claim or statement to verify:")
 
30
 
31
+ if user_input:
32
+ # Embed the user query
33
+ query_embedding = embedder.encode([user_input])
34
+
35
+ # Search similar news articles
36
+ top_k = 3
37
+ D, I = index.search(query_embedding, top_k)
38
+
39
+ # Get the top matched articles
40
+ results = [corpus[i] for i in I[0]]
41
+
42
+ # Display retrieved articles
43
+ st.subheader("🔍 Retrieved News Snippets")
44
+ for idx, res in enumerate(results):
45
+ st.write(f"**Snippet {idx+1}:** {res}")
46
+
47
+ # Combine into context for generation
48
+ context = " ".join(results)
49
+ prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:"
50
+
51
+ # Generate answer
52
+ st.subheader("✅ Fact Check Result")
53
+ response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text']
54
+ st.write(response)