Spaces:

SakibHasan
/

Emotion_Classifier

Sleeping

App Files Files Community

SakibRumu commited on Apr 1

Commit

2894e20

verified ·

1 Parent(s): e432586

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -56

app.py CHANGED Viewed

@@ -1,50 +1,72 @@
 import torch
-import timm
 import torch.nn as nn
 import gradio as gr
 from PIL import Image
-from torchvision import transforms
-# Define your custom model architecture (HybridCNNTransformer in this case)
 class HybridCNNTransformer(nn.Module):
     def __init__(self, num_classes=7):
         super(HybridCNNTransformer, self).__init__()
-        # Example: Using ResNet50 from timm as a CNN feature extractor
-        self.backbone = timm.create_model('resnet50', pretrained=True)
-        # Example Transformer part (modify according to your model)
-        self.transformer = nn.Transformer(d_model=2048, nhead=8, num_encoder_layers=6)
-        # Final fully connected layer (7 classes for emotion recognition)
-        self.fc = nn.Linear(2048, num_classes)
     def forward(self, x):
-        # CNN feature extraction
-        cnn_features = self.backbone(x)
-        # Transformer encoding (if applicable, you might not need this part)
-        transformer_features = self.transformer(cnn_features, cnn_features)
-        # Final classification layer
-        output = self.fc(transformer_features)
         return output
-# Load the model
 model = HybridCNNTransformer(num_classes=7)
-# Load the weights from the .pth file
-model_path = "transformer_emotion_recognition_model.pth"  # Replace with the path to your .pth file
-model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))  # For CPU; change 'cpu' to 'cuda' for GPU
-# Move the model to the appropriate device (CUDA or CPU)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-# Set the model to evaluation mode
-model.eval()
-# Custom CSS for layout styling
 css = """
 body {
     background-color: #1e1e1e;
@@ -76,29 +98,6 @@ body {
 }
 """
-# Image Preprocessing for the model (assuming the model was trained with resized and normalized images)
-preprocess = transforms.Compose([
-    transforms.Resize((224, 224)),  # Adjust according to your model's input size
-    transforms.ToTensor(),
-    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Standard ImageNet normalization
-])
-# Prediction function
-def predict_emotion(image):
-    # Preprocess the image
-    image_tensor = preprocess(image).unsqueeze(0).to(device)  # Add batch dimension and move to device
-    # Make prediction
-    with torch.no_grad():
-        output = model(image_tensor)
-        _, predicted = torch.max(output, 1)  # Get the predicted class
-        confidence = torch.nn.functional.softmax(output, dim=1).max().item()  # Confidence score
-    # Return the predicted emotion label and confidence score
-    emotions = ["Anger", "Disgust", "Fear", "Happiness", "Sadness", "Surprise", "Neutral"]  # Modify labels as per your model
-    predicted_emotion = emotions[predicted.item()]
-    return predicted_emotion, confidence
 # Gradio Interface
 iface = gr.Interface(
     fn=predict_emotion,
@@ -106,10 +105,10 @@ iface = gr.Interface(
     outputs=[gr.Textbox(label="Predicted Emotion"), gr.Textbox(label="Confidence")],
     live=True,
     title="Emotion Classification",
-    description="Upload an image to predict the emotion expressed in the image using a fine-tuned SE-ResNet50 model.",
     css=css
 )
 # Launch the app
 if __name__ == "__main__":
-    iface.launch()

 import torch
 import torch.nn as nn
 import gradio as gr
+from torchvision import models, transforms
 from PIL import Image
+from transformers import ViTModel
+# Define HybridCNNTransformer Model
 class HybridCNNTransformer(nn.Module):
     def __init__(self, num_classes=7):
         super(HybridCNNTransformer, self).__init__()
+        # CNN Feature Extractor (ResNet50)
+        self.cnn = models.resnet50(pretrained=True)
+        self.cnn = nn.Sequential(*list(self.cnn.children())[:-2])  # Remove FC layers
+        # Reduce channels (2048 → 64)
+        self.channel_reduction = nn.Conv2d(in_channels=2048, out_channels=64, kernel_size=1)
+        # Convert to 3 channels for ViT
+        self.to_rgb = nn.Conv2d(in_channels=64, out_channels=3, kernel_size=1)
+        # Vision Transformer
+        self.transformer = ViTModel.from_pretrained("google/vit-base-patch16-224")
+        # Fully Connected Layers (Classifier Head)
+        self.fc = nn.Sequential(
+            nn.Linear(768, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, num_classes)
+        )
     def forward(self, x):
+        cnn_features = self.cnn(x)
+        reduced_features = self.channel_reduction(cnn_features)
+        rgb_features = self.to_rgb(reduced_features)
+        resized_features = nn.functional.interpolate(rgb_features, size=(224, 224), mode="bilinear", align_corners=False)
+        transformer_output = self.transformer(pixel_values=resized_features).last_hidden_state[:, 0, :]
+        output = self.fc(transformer_output)
         return output
+# Load Model
 model = HybridCNNTransformer(num_classes=7)
+state_dict = torch.load("transformer_emotion_recognition_model.pth", map_location=torch.device('cpu'))
+model.load_state_dict(state_dict, strict=False)
+model.eval()
+# Define Preprocessing Transform
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+])
+# Define Prediction Function
+def predict_emotion(image):
+    image = transform(image).unsqueeze(0)  # Add batch dimension
+    with torch.no_grad():
+        output = model(image)
+        probabilities = torch.nn.functional.softmax(output, dim=1)
+        confidence, predicted_class = torch.max(probabilities, 1)
+    class_labels = ["Angry", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]
+    predicted_emotion = class_labels[predicted_class.item()]
+    return predicted_emotion, f"{confidence.item() * 100:.2f}%"
+# Custom CSS for UI Styling
 css = """
 body {
     background-color: #1e1e1e;
 }
 """
 # Gradio Interface
 iface = gr.Interface(
     fn=predict_emotion,
     outputs=[gr.Textbox(label="Predicted Emotion"), gr.Textbox(label="Confidence")],
     live=True,
     title="Emotion Classification",
+    description="Upload an image to predict the emotion expressed in the image using a fine-tuned ResNet50 + Vision Transformer model.",
     css=css
 )
 # Launch the app
 if __name__ == "__main__":
+    iface.launch()