Spaces:
Sleeping
Sleeping
File size: 2,609 Bytes
47ce5f0 39f1550 8b265e9 47ce5f0 39f1550 47ce5f0 39f1550 47ce5f0 39f1550 47ce5f0 e04d1d4 39f1550 47ce5f0 39f1550 47ce5f0 39f1550 47ce5f0 39f5f9a 47ce5f0 39f5f9a 39f1550 47ce5f0 39f5f9a 47ce5f0 39f1550 47ce5f0 39f5f9a 47ce5f0 69452e3 47ce5f0 69452e3 47ce5f0 39f1550 47ce5f0 e04d1d4 47ce5f0 e04d1d4 47ce5f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import gradio as gr
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
import torch
import torchaudio
# Modelni yuklash
model_name = "Mrkomiljon/voiceGUARD"
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
processor = Wav2Vec2Processor.from_pretrained(model_name)
model.eval()
# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Klass mapping
id2label = {
0: "diffwave",
1: "melgan",
2: "parallel_wave_gan",
3: "Real",
4: "wavegrad",
5: "wavnet",
6: "wavernn"
}
# Prediction funksiyasi
def predict_audio(file_path):
target_sample_rate = 16000
max_length = target_sample_rate * 10
try:
# upload audio file
waveform, sample_rate = torchaudio.load(file_path)
# Resample agar sample_rate mos kelmasa
if sample_rate != target_sample_rate:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)
waveform = resampler(waveform)
# Truncate yoki pad
if waveform.size(1) > max_length:
waveform = waveform[:, :max_length]
elif waveform.size(1) < max_length:
waveform = torch.nn.functional.pad(waveform, (0, max_length - waveform.size(1)))
if waveform.ndim > 1:
waveform = waveform[0]
# Preprocess input
inputs = processor(
waveform.numpy(),
sampling_rate=target_sample_rate,
return_tensors="pt",
padding=True
)
input_values = inputs["input_values"].to(device)
# Inference
with torch.no_grad():
logits = model(input_values).logits
probabilities = torch.nn.functional.softmax(logits, dim=-1)
predicted_label = torch.argmax(probabilities, dim=-1).item()
confidence = probabilities[0, predicted_label].item()
class_name = id2label.get(predicted_label, "Unknown Class")
# Return alohida qiymatlar
return class_name, float(confidence)
except Exception as e:
# Xatolik bo'lsa
return "Error", str(e)
# Gradio interfeysi
iface = gr.Interface(
fn=predict_audio,
inputs=gr.Audio(type="filepath"), # `filepath` parameter use
outputs=[
gr.Label(label="Predicted Class"),
gr.Label(label="Confidence")
],
title="Human or AI-generated voice classification",
description="Upload an audio file to classify it into one of the predefined categories."
)
if __name__ == "__main__":
iface.launch()
|