examples = [
    [
        "3D-Speaker",
        "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB",
        "pyannote/segmentation-3.0",
        "4",
        "0",
        "0-four-speakers-zh.wav",
    ],
    [
        "3D-Speaker",
        "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB",
        "pyannote/segmentation-3.0",
        "2",
        "0",
        "1-two-speakers-en.wav",
    ],
    [
        "3D-Speaker",
        "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB",
        "pyannote/segmentation-3.0",
        "2",
        "0",
        "2-two-speakers-en.wav",
    ],
    [
        "3D-Speaker",
        "3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx|37.8MB",
        "pyannote/segmentation-3.0",
        "2",
        "0",
        "3-two-speakers-en.wav",
    ],
]