import os os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib_cache" import streamlit as st import pandas as pd import matplotlib.pyplot as plt st.title("📊 Parquet Data Explorer") # Login using e.g. `huggingface-cli login` to access this dataset #df = pd.read_parquet("hf://datasets/Daniela-C/Yellow_tripdata_2025/yellow_tripdata_2025-06.parquet") from datasets import load_dataset # Load directly from the HF Dataset Hub dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="train") # or load the separate splits if the dataset has train/validation/test splits train_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="train") valid_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="validation") test_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="test") from datasets import Dataset df = load_data() st.write("Sample of Your Data:") st.dataframe(df.head()) # Visualize numeric column if it exists numeric_cols = df.select_dtypes(include='number').columns if len(numeric_cols) > 0: selected_col = st.selectbox("Choose a numeric column to plot:", numeric_cols) st.write(f"### Histogram for `{selected_col}`") fig, ax = plt.subplots() df[selected_col].hist(bins=30, ax=ax) st.pyplot(fig) else: st.warning("No numeric columns found in your dataset.")