""" Configuration module for the Fake News Detector application. This module handles loading configuration parameters, API keys, and source credibility data needed for the fact checking system. It manages environment variables and file-based configurations. """ import os import json import logging from pathlib import Path # Configure logger logger = logging.getLogger("misinformation_detector") # Base paths ROOT_DIR = Path(__file__).parent.absolute() DATA_DIR = ROOT_DIR / "data" # Ensure data directory exists DATA_DIR.mkdir(exist_ok=True) # First try to get API keys from Streamlit secrets, then fall back to environment variables # try: # import streamlit as st # OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", "")) # NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", "")) # FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", "")) # except (AttributeError, ImportError): # # Fall back to environment variables if Streamlit secrets aren't available # OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") # NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "") # FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "") try: import streamlit as st OPENAI_API_KEY = st.secrets.get("OPENAI_API_KEY", os.environ.get("OPENAI_API_KEY", "")) NEWS_API_KEY = st.secrets.get("NEWS_API_KEY", os.environ.get("NEWS_API_KEY", "")) FACTCHECK_API_KEY = st.secrets.get("FACTCHECK_API_KEY", os.environ.get("FACTCHECK_API_KEY", "")) except (AttributeError, ImportError): # For local testing only - REMOVE BEFORE COMMITTING! OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-iwzefHOGPoeAzC0mNUsIT3BlbkFJlGzELYyK52szvpv3MKMY") NEWS_API_KEY = os.environ.get("NEWS_API_KEY", "43ebe77036904dc1a150893a40d10bb3") FACTCHECK_API_KEY = os.environ.get("FACTCHECK_API_KEY", "AIzaSyD9VqVCk_9gsEfsvstES5HW-195F5WgUuA") # Log secrets status (but not the values) if OPENAI_API_KEY: logger.info("OPENAI_API_KEY is set") else: logger.warning("OPENAI_API_KEY not set. The application will not function properly.") if NEWS_API_KEY: logger.info("NEWS_API_KEY is set") else: logger.warning("NEWS_API_KEY not set. News evidence retrieval will be limited.") if FACTCHECK_API_KEY: logger.info("FACTCHECK_API_KEY is set") else: logger.warning("FACTCHECK_API_KEY not set. Fact-checking evidence will be limited.") # Set API key in environment to ensure it's available to all components os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY # Source credibility file path source_cred_file = DATA_DIR / "source_credibility.json" def load_source_credibility(): """ Load source credibility data from JSON file Returns: dict: Mapping of domain names to credibility scores (0-1) Empty dict if file is not found or has errors """ try: if source_cred_file.exists(): with open(source_cred_file, 'r') as f: return json.load(f) else: logger.warning(f"Source credibility file not found: {source_cred_file}") return {} except Exception as e: logger.error(f"Error loading source credibility file: {e}") return {} # Load source credibility once at module import SOURCE_CREDIBILITY = load_source_credibility() # Rate limiting configuration RATE_LIMITS = { # api_name: {"requests": max_requests, "period": period_in_seconds} "newsapi": {"requests": 100, "period": 3600}, # 100 requests per hour "factcheck": {"requests": 1000, "period": 86400}, # 1000 requests per day "semantic_scholar": {"requests": 10, "period": 300}, # 10 requests per 5 minutes "wikidata": {"requests": 60, "period": 60}, # 60 requests per minute "wikipedia": {"requests": 200, "period": 60}, # 200 requests per minute "rss": {"requests": 300, "period": 3600} # 300 RSS requests per hour } # Error backoff settings ERROR_BACKOFF = { "max_retries": 5, "initial_backoff": 1, # seconds "backoff_factor": 2, # exponential backoff } # RSS feed settings RSS_SETTINGS = { "max_feeds_per_request": 10, # Maximum number of feeds to try per request "max_age_days": 3, # Maximum age of RSS items to consider "timeout_seconds": 5, # Timeout for RSS feed requests "max_workers": 5 # Number of parallel workers for fetching feeds } # Semantic analysis settings SEMANTIC_ANALYSIS_CONFIG = { "similarity_weight": 0.4, # Weight for semantic similarity "entity_overlap_weight": 0.3, # Weight for entity matching "base_weight": 0.3, # Base relevance weight "temporal_boost": 1.2, # Boost for recent evidence "temporal_penalty": 0.7, # Penalty for outdated evidence "authority_boosts": { "scientific_consensus": 1.8, "fact_check": 1.5, "high_authority": 1.3 } }