import streamlit as st from transformers import pipeline import PyPDF2 import docx import textwrap # Streamlit Page Config st.set_page_config( page_title="TextSphere", page_icon="🤖", layout="wide", initial_sidebar_state="expanded" ) # Footer st.markdown("""
""", unsafe_allow_html=True) # Load Model @st.cache_resource def load_models(): try: summarization_model = pipeline("summarization", model="facebook/bart-large-cnn") except Exception as e: raise RuntimeError(f"Failed to load model: {str(e)}") return summarization_model summarization_model = load_models() # Function to Extract Text from PDF def extract_text_from_pdf(uploaded_pdf): try: pdf_reader = PyPDF2.PdfReader(uploaded_pdf) pdf_text = "" for page in pdf_reader.pages: text = page.extract_text() if text: pdf_text += text + "\n" if not pdf_text.strip(): st.error("No text found in the PDF.") return None return pdf_text except Exception as e: st.error(f"Error reading the PDF: {e}") return None # Function to Extract Text from TXT def extract_text_from_txt(uploaded_txt): try: return uploaded_txt.read().decode("utf-8").strip() except Exception as e: st.error(f"Error reading the TXT file: {e}") return None # Function to Extract Text from DOCX def extract_text_from_docx(uploaded_docx): try: doc = docx.Document(uploaded_docx) return "\n".join([para.text for para in doc.paragraphs]).strip() except Exception as e: st.error(f"Error reading the DOCX file: {e}") return None # Function to Split Text into 1024-Token Chunks def chunk_text(text, max_tokens=1024): return textwrap.wrap(text, width=max_tokens) # Sidebar for Task Selection (Default: Text Summarization) st.sidebar.title("AI Solutions") option = st.sidebar.selectbox( "Choose a task", ["Text Summarization", "Question Answering", "Text Classification", "Language Translation"], index=0 # Default to "Text Summarization" ) # Text Summarization Task if option == "Text Summarization": st.title("📄 Text Summarization") st.markdown("