Spaces:

navidved
/

tts_labeling

Running

App Files Files Community

Navid Arabi commited on May 23

Commit

d86a872

1 Parent(s): d271760

add seed func

Browse files

Files changed (4) hide show

app.py +5 -3
config.py +4 -1
import_db.py +0 -71
seed_db.py +52 -0

app.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import gradio as gr
-def greet(name):
-    return name
-demo = gr.Interface(fn=greet, outputs="text")
 demo.launch()

 import gradio as gr
+from seed_db import seed
+def run():
+    return seed()
+demo = gr.Interface(fn=run, outputs="text")
 demo.launch()

config.py CHANGED Viewed

@@ -6,4 +6,7 @@ db_config = {
     'password': os.environ.get('DB_PASSWORD'),
     'host': os.environ.get('DB_HOST'),
     'database': os.environ.get('DB_NAME')
-}

     'password': os.environ.get('DB_PASSWORD'),
     'host': os.environ.get('DB_HOST'),
     'database': os.environ.get('DB_NAME')
+}
+hf_token = os.environ.get('HF_TOKEN')
+hf_tts_ds_repo = os.environ.get('navidved/channelb-raw-data')

import_db.py DELETED Viewed

@@ -1,71 +0,0 @@
-from datasets import load_dataset
-from huggingface_hub import login
-import os
-token = os.environ.get('HF_TOKEN')
-login(token=token)
-dataset = load_dataset("navidved/channelb-raw-data", split="train", trust_remote_code=True)
-print(dataset.column_names)
-print(dataset[0])
-import mysql.connector
-# اتصال به دیتابیس (مقدارها رو بر اساس نیاز ست کن)
-conn = mysql.connector.connect(
-    host="",
-    user="",
-    password="",
-    database="",
-    port=32107,
-    charset="utf8mb4",
-    use_unicode=True
-)
-cursor = conn.cursor()
-# ساخت جدول اگر وجود نداشت
-cursor.execute("""
-CREATE TABLE IF NOT EXISTS tts_data (
-    id INT AUTO_INCREMENT PRIMARY KEY,
-    filename VARCHAR(255),
-    sentence TEXT
-)
-""")
-# تعداد رکورد در هر batch
-batch_size = 1000
-batch = []
-for i, item in enumerate(dataset):
-    filename = f"sample_{i}.wav"
-    sentence = item["sentence"]
-    batch.append((filename, sentence))
-    # وقتی batch کامل شد
-    if len(batch) == batch_size:
-        cursor.executemany(
-            "INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
-        )
-        conn.commit()
-        print(f"✅ {i + 1} رکورد تا الان ثبت شد")
-        batch = []
-# ثبت رکوردهای باقیمانده (کمتر از batch_size)
-if batch:
-    cursor.executemany(
-        "INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
-    )
-    conn.commit()
-    print(f"✅ آخرین {len(batch)} رکورد هم ثبت شد")
-# بستن ارتباط
-cursor.close()
-conn.close()

seed_db.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import mysql.connector
+from datasets import load_dataset
+from huggingface_hub import login
+import config
+def seed():
+    login(token=config.hf_token)
+    dataset = load_dataset(config.hf_tts_ds_repo, split="train", trust_remote_code=True)
+    print(dataset.column_names)
+    print(dataset[0])
+    conn = mysql.connector.connect(config.db_config)
+    cursor = conn.cursor()
+    cursor.execute(
+        """
+    CREATE TABLE IF NOT EXISTS tts_data (
+        id INT AUTO_INCREMENT PRIMARY KEY,
+        filename VARCHAR(255),
+        sentence TEXT
+    )
+    """
+    )
+    batch_size = 1000
+    batch = []
+    for i, item in enumerate(dataset):
+        filename = f"sample_{i}.wav"
+        sentence = item["sentence"]
+        batch.append((filename, sentence))
+        if len(batch) == batch_size:
+            cursor.executemany(
+                "INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
+            )
+            conn.commit()
+            print(f"✅ {i + 1} records saved!")
+            batch = []
+    if batch:
+        cursor.executemany(
+            "INSERT INTO tts_data (filename, sentence) VALUES (%s, %s)", batch
+        )
+        conn.commit()
+        print(f"✅ last {len(batch)} records saved.")
+    cursor.close()
+    conn.close()
+    return "done!"