nyc_taxi_app / nyc_taxi_app.py
Daniela-C's picture
Update nyc_taxi_app.py
c8c8b72 verified
import os
os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib_cache"
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
st.title("πŸ“Š Parquet Data Explorer")
# Login using e.g. `huggingface-cli login` to access this dataset
#df = pd.read_parquet("hf://datasets/Daniela-C/Yellow_tripdata_2025/yellow_tripdata_2025-06.parquet")
from datasets import load_dataset
# Load directly from the HF Dataset Hub
dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="train")
# or load the separate splits if the dataset has train/validation/test splits
train_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="train")
valid_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="validation")
test_dataset = load_dataset("Daniela-C/Yellow_tripdata_2025", split="test")
from datasets import Dataset
df = load_data()
st.write("Sample of Your Data:")
st.dataframe(df.head())
# Visualize numeric column if it exists
numeric_cols = df.select_dtypes(include='number').columns
if len(numeric_cols) > 0:
selected_col = st.selectbox("Choose a numeric column to plot:", numeric_cols)
st.write(f"### Histogram for `{selected_col}`")
fig, ax = plt.subplots()
df[selected_col].hist(bins=30, ax=ax)
st.pyplot(fig)
else:
st.warning("No numeric columns found in your dataset.")