Spaces:

pepperumo
/

MVTec_Website

Running

File size: 7,036 Bytes
import pandas as pd
import plotly.graph_objects as go
import streamlit as st
from PIL import Image
from joypy import joyplot
import seaborn as sns
import matplotlib.pyplot as plt

# Function to load dataset
def load_dataset():
    file_path = "Data/mvtec_meta_features_dataset.csv"
    try:
        complete_df = pd.read_csv(file_path)

        # Show available column names for debugging
        print("Available columns:", complete_df.columns)

        # Verify column presence
        required_columns = ["category", "set_type", "anomaly_status"]
        for col in required_columns:
            if col not in complete_df.columns:
                raise KeyError(f"Missing required column: {col}")
        
        # Define the subclasses for each category
        subclasses = {
            'Texture-Based': ['carpet', 'wood', 'tile', 'leather', 'zipper'],
            'Industrial Components': ['cable', 'transistor', 'screw', 'grid', 'metal_nut'],
            'Consumer Products': ['bottle', 'capsule', 'toothbrush'],
            'Edible': ['hazelnut', 'pill']
        }

        # Add a new column to the DataFrame to store the subclass
        complete_df['subclass'] = complete_df['category'].apply(
            lambda x: next((key for key, value in subclasses.items() if x in value), 'Unknown')
        )

        # Reorder columns to place 'subclass' after 'category'
        cols = list(complete_df.columns)
        cols.insert(cols.index('category') + 1, cols.pop(cols.index('subclass')))
        complete_df = complete_df[cols]

        return complete_df
    except Exception as e:
        st.error(f"Error loading dataset: {e}")
        return None

# Function to generate dataset statistics
def dataset_statistics():
    df = load_dataset()
    if df is not None:
        print("Loaded dataset preview:\n", df.head())  # Debugging step

        # Aggregate counts for each category and condition
        train_normal = df[(df['set_type'] == 'train') & (df['anomaly_status'] == 'normal')].groupby('category').size()
        test_normal = df[(df['set_type'] == 'test') & (df['anomaly_status'] == 'normal')].groupby('category').size()
        test_anomalous = df[(df['set_type'] == 'test') & (df['anomaly_status'] == 'anomalous')].groupby('category').size()

        # Combine into a single DataFrame
        final_summary = pd.DataFrame({
            'Train Normal Images': train_normal,
            'Test Normal Images': test_normal,
            'Test Anomalous Images': test_anomalous
        }).fillna(0).reset_index()

        return final_summary
    return None

# Function to generate the bar chart
def dataset_distribution_chart(df):
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=df['category'], 
        y=df['Train Normal Images'], 
        name='Train Normal Images',
        marker_color='blue'
    ))
    fig.add_trace(go.Bar(
        x=df['category'], 
        y=df['Test Normal Images'], 
        name='Test Normal Images',
        marker_color='red'
    ))
    fig.add_trace(go.Bar(
        x=df['category'], 
        y=df['Test Anomalous Images'], 
        name='Test Anomalous Images',
        marker_color='green'
    ))

    # Update layout
    fig.update_layout(
        title="Distribution of Normal and Anomalous Images per Category",
        xaxis_title="Categories",
        yaxis_title="Number of Images",
        barmode='stack',
        legend_title="Image Types"
    )

    # Display chart in Streamlit
    st.plotly_chart(fig, use_container_width=True)

# Function to display the complete dataframe with expander
def display_dataframe():
    df = load_dataset()
    if df is not None:
        with st.expander("Show Complete DataFrame"):
            st.dataframe(df)



def plot_bgr_pixel_densities(df, pixel_columns=['num_pixels_b', 'num_pixels_g', 'num_pixels_r']):
    """

    Generate JoyPy density plots for pixel counts of BGR channels for a given category.



    Parameters:

        df (pd.DataFrame): Filtered DataFrame for a single category.

        pixel_columns (list): List of column names for BGR pixel counts.



    Returns:

        None

    """
    if df.empty:
        st.warning("⚠️ No data available for the selected category.")
        return

    # Plot JoyPy density plot
    fig, axes = joyplot(
        data=df,
        by="category",  # Group by category
        column=pixel_columns,
        color=['blue', 'green', 'red'],  # Colors for BGR channels
        alpha=0.5,
        fade=True,
        legend=True,
        linewidth=1.0,
        overlap=3,
        figsize=(8, 6)  # Adjust the figure size here
    )

    # Add title and labels
    plt.title(f'Density Plots for {df["category"].unique()[0]}', fontsize=14)
    plt.xlabel('Number of Pixels Density', fontsize=12)
    plt.ylabel('Categories', fontsize=12)

    # Show the plot in Streamlit
    st.pyplot(fig)

    
def plot_pair_plots(complete_df):
    """

    Generate and display pair plots for each category in the dataset.



    Parameters:

        complete_df (pd.DataFrame): The input DataFrame containing image features and categories.



    Returns:

        None

    """

    # Define the features to be included in the pairplot
    features = ['num_pixels_b', 'num_pixels_g', 'num_pixels_r', 'perceived_brightness']

    # Create a separate pairplot for each category
    for category in complete_df['category'].unique():
        # Filter data for current category
        category_df = complete_df[complete_df['category'] == category]
        
        # Check if the filtered DataFrame is not empty
        if not category_df.empty:
            # Create PairGrid with hue and palette
            g = sns.PairGrid(category_df, vars=features, hue='anomaly_status', palette={'normal': 'blue', 'anomalous': 'red'})
            
            # Map the plots to the grid
            g.map_upper(sns.scatterplot, alpha=0.6)
            g.map_diag(sns.histplot, kde=True)
            g.map_lower(sns.scatterplot, alpha=0.6)  
            
            # Add legend
            g.add_legend()
            
            # Customize the plot
            g.figure.suptitle(f'Feature Relationships for {category.title()}', y=1.02, fontsize=14)
            
            # Improve label readability
            for i in range(len(g.axes)):
                for j in range(len(g.axes)):
                    if g.axes[i][j] is not None:
                        g.axes[i][j].set_xlabel(g.axes[i][j].get_xlabel().replace('_', ' ').title())
                        g.axes[i][j].set_ylabel(g.axes[i][j].get_ylabel().replace('_', ' ').title())
            
            # Adjust legend position to the right without overlapping the plots
            g._legend.set_bbox_to_anchor((1.05, 0.5))
            g._legend.set_loc('center left')
            
            plt.tight_layout()
            st.pyplot(g.figure)