vukosi commited on
Commit
cf370ec
Β·
verified Β·
1 Parent(s): 99700a7

Updated with a Claude Driven Modernization

Browse files
Files changed (1) hide show
  1. app.py +614 -51
app.py CHANGED
@@ -1,64 +1,627 @@
1
  import gradio as gr
 
 
 
 
 
2
  from transformers import pipeline
 
 
 
 
3
 
4
- translater_en_ss = pipeline("translation", model="dsfsi/en-ss-m2m100-combo", src_lang="en", tgt_lang="ss")
5
- translater_ss_en = pipeline("translation", model="dsfsi/ss-en-m2m100-combo", src_lang="ss", tgt_lang="en")
 
6
 
7
- def translate(inp, direction):
8
- if direction == 'en->ss':
9
- res = translater_en_ss(inp, max_length=512, early_stopping=True)[0]['translation_text']
10
- else:
11
- res = translater_ss_en(inp, max_length=512, early_stopping=True)[0]['translation_text']
12
- return res
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- with gr.Blocks() as demo:
15
- with gr.Row():
16
- with gr.Column(scale=1):
17
- pass
18
- with gr.Column(scale=4, min_width=1000):
19
- gr.Image("logo_transparent_small.png", elem_id="logo", show_label=False, width=500)
20
- gr.Markdown(
21
- """
22
- <h1 style='text-align: center;'>Siswati-English Translation</h1>
23
- <p style='text-align: center;'>This space provides a bidirectional translation service from Siswati to English.</p>
24
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
- with gr.Column(scale=1):
27
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
29
  with gr.Row():
30
- with gr.Column(scale=1):
31
- pass
32
- with gr.Column(scale=4, min_width=1000):
33
- inp_text = gr.Textbox(lines=5, placeholder="Enter text (maximum 5 lines)", label="Input")
34
- direction = gr.Radio(choices=['en->ss', 'ss->en'], label='Direction')
35
- translate_button = gr.Button("Translate")
36
- output_text = gr.Textbox(label="Output")
37
- translate_button.click(translate, inputs=[inp_text, direction], outputs=output_text)
38
- with gr.Column(scale=1):
39
- pass
40
-
 
 
 
 
41
  with gr.Row():
42
- with gr.Column(scale=1):
43
- pass
44
- with gr.Column(scale=4, min_width=1000):
45
- gr.Markdown(
46
- """
47
- <div style='text-align: center;'>
48
- <a href='https://github.com/dsfsi/en-ss-m2m100-combo' target='_blank'>En-Ss GitHub</a> |
49
- <a href='https://github.com/dsfsi/ss-en-m2m100-combo' target='_blank'>Ss-En GitHub</a> |
50
- <a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  </div>
52
  """
53
- )
54
- with gr.Column(scale=1):
55
- pass
 
 
 
 
 
 
 
 
 
 
 
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- with gr.Accordion("More Information", open=False):
59
- gr.Markdown("""
60
- <h4 style="text-align: center;">Authors</h4>
61
- <div style='text-align: center;'>Vukosi Marivate, Richard Lastrucci</div>
62
- """)
63
-
64
- demo.launch()
 
1
  import gradio as gr
2
+ import logging
3
+ import time
4
+ import json
5
+ import csv
6
+ import io
7
  from transformers import pipeline
8
+ from typing import Tuple, Optional, List, Dict
9
+ import traceback
10
+ from datetime import datetime
11
+ import re
12
 
13
+ # Configure logging for debugging
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__name__)
16
 
17
+ class LinguisticTranslationApp:
18
+ def __init__(self):
19
+ self.translators = {}
20
+ self.translation_history = []
21
+ self.load_models()
22
+
23
+ def load_models(self):
24
+ """Load translation models with error handling"""
25
+ try:
26
+ logger.info("Loading translation models...")
27
+ self.translators['en_to_ss'] = pipeline(
28
+ "translation",
29
+ model="dsfsi/en-ss-m2m100-combo",
30
+ src_lang="en",
31
+ tgt_lang="ss"
32
+ )
33
+ self.translators['ss_to_en'] = pipeline(
34
+ "translation",
35
+ model="dsfsi/ss-en-m2m100-combo",
36
+ src_lang="ss",
37
+ tgt_lang="en"
38
+ )
39
+ logger.info("Models loaded successfully!")
40
+ except Exception as e:
41
+ logger.error(f"Error loading models: {str(e)}")
42
+ raise e
43
 
44
+ def analyze_text_complexity(self, text: str, lang: str) -> Dict:
45
+ """Analyze linguistic features of the input text"""
46
+ words = text.split()
47
+ sentences = re.split(r'[.!?]+', text)
48
+ sentences = [s.strip() for s in sentences if s.strip()]
49
+
50
+ # Basic linguistic metrics
51
+ analysis = {
52
+ 'character_count': len(text),
53
+ 'word_count': len(words),
54
+ 'sentence_count': len(sentences),
55
+ 'avg_word_length': sum(len(word) for word in words) / len(words) if words else 0,
56
+ 'avg_sentence_length': len(words) / len(sentences) if sentences else 0,
57
+ 'unique_words': len(set(word.lower() for word in words)),
58
+ 'lexical_diversity': len(set(word.lower() for word in words)) / len(words) if words else 0
59
+ }
60
+
61
+ # Language-specific features
62
+ if lang == 'ss': # Siswati
63
+ # Check for common Siswati features
64
+ analysis['potential_agglutination'] = sum(1 for word in words if len(word) > 10)
65
+ analysis['click_consonants'] = sum(text.count(click) for click in ['c', 'q', 'x'])
66
+ analysis['tone_markers'] = text.count('́') + text.count('Μ€') # Acute and grave accents
67
+
68
+ return analysis
69
+
70
+ def translate_text(self, text: str, direction: str, save_to_history: bool = True) -> Tuple[str, str, bool, Dict]:
71
+ """
72
+ Translate text with comprehensive linguistic analysis
73
+
74
+ Returns:
75
+ Tuple[str, str, bool, Dict]: (translated_text, status_message, success, analysis)
76
+ """
77
+ if not text or not text.strip():
78
+ return "", "⚠️ Please enter some text to translate", False, {}
79
+
80
+ if not direction:
81
+ return "", "⚠️ Please select a translation direction", False, {}
82
+
83
+ # Input validation
84
+ if len(text) > 2000: # Increased limit for linguistic work
85
+ return "", "⚠️ Text is too long. Please limit to 2000 characters.", False, {}
86
+
87
+ try:
88
+ start_time = time.time()
89
+
90
+ # Determine source and target languages
91
+ if direction == 'English β†’ Siswati':
92
+ translator = self.translators['en_to_ss']
93
+ source_lang = "English"
94
+ target_lang = "Siswati"
95
+ source_code = "en"
96
+ target_code = "ss"
97
+ else:
98
+ translator = self.translators['ss_to_en']
99
+ source_lang = "Siswati"
100
+ target_lang = "English"
101
+ source_code = "ss"
102
+ target_code = "en"
103
+
104
+ logger.info(f"Translating from {source_lang} to {target_lang}")
105
+
106
+ # Analyze source text
107
+ source_analysis = self.analyze_text_complexity(text, source_code)
108
+
109
+ # Perform translation
110
+ result = translator(
111
+ text,
112
+ max_length=512,
113
+ early_stopping=True,
114
+ do_sample=False,
115
+ num_beams=4 # Better quality for linguistic analysis
116
  )
117
+
118
+ translation = result[0]['translation_text']
119
+
120
+ # Analyze translated text
121
+ target_analysis = self.analyze_text_complexity(translation, target_code)
122
+
123
+ # Calculate processing time
124
+ processing_time = time.time() - start_time
125
+
126
+ # Linguistic comparison
127
+ analysis = {
128
+ 'source': source_analysis,
129
+ 'target': target_analysis,
130
+ 'translation_ratio': len(translation) / len(text) if text else 0,
131
+ 'word_ratio': target_analysis['word_count'] / source_analysis['word_count'] if source_analysis['word_count'] else 0,
132
+ 'processing_time': processing_time,
133
+ 'timestamp': datetime.now().isoformat()
134
+ }
135
+
136
+ # Save to history for linguistic research
137
+ if save_to_history:
138
+ history_entry = {
139
+ 'source_text': text,
140
+ 'translated_text': translation,
141
+ 'direction': direction,
142
+ 'source_lang': source_lang,
143
+ 'target_lang': target_lang,
144
+ 'analysis': analysis,
145
+ 'timestamp': datetime.now().isoformat()
146
+ }
147
+ self.translation_history.append(history_entry)
148
+
149
+ # Success message with linguistic metadata
150
+ status_msg = f"βœ… Translation completed in {processing_time:.2f}s | Word ratio: {analysis['word_ratio']:.2f} | Character ratio: {analysis['translation_ratio']:.2f}"
151
+
152
+ logger.info(f"Translation completed: {processing_time:.2f}s")
153
+
154
+ return translation, status_msg, True, analysis
155
+
156
+ except Exception as e:
157
+ error_msg = f"❌ Translation failed: {str(e)}"
158
+ logger.error(f"Translation error: {str(e)}")
159
+ logger.error(traceback.format_exc())
160
+ return "", error_msg, False, {}
161
+
162
+ def batch_translate(self, text_list: List[str], direction: str) -> List[Dict]:
163
+ """Translate multiple texts for corpus analysis"""
164
+ results = []
165
+ for i, text in enumerate(text_list):
166
+ if text.strip():
167
+ translation, status, success, analysis = self.translate_text(text, direction, False)
168
+ results.append({
169
+ 'index': i + 1,
170
+ 'source': text,
171
+ 'translation': translation,
172
+ 'success': success,
173
+ 'analysis': analysis
174
+ })
175
+ return results
176
+
177
+ def export_history_csv(self) -> str:
178
+ """Export translation history as CSV for linguistic analysis"""
179
+ if not self.translation_history:
180
+ return None
181
+
182
+ output = io.StringIO()
183
+ writer = csv.writer(output)
184
+
185
+ # Headers
186
+ writer.writerow([
187
+ 'Timestamp', 'Source Language', 'Target Language', 'Source Text',
188
+ 'Translation', 'Source Words', 'Target Words', 'Word Ratio',
189
+ 'Source Characters', 'Target Characters', 'Character Ratio',
190
+ 'Lexical Diversity (Source)', 'Lexical Diversity (Target)',
191
+ 'Processing Time (s)'
192
+ ])
193
+
194
+ # Data rows
195
+ for entry in self.translation_history:
196
+ analysis = entry['analysis']
197
+ writer.writerow([
198
+ entry['timestamp'],
199
+ entry['source_lang'],
200
+ entry['target_lang'],
201
+ entry['source_text'],
202
+ entry['translated_text'],
203
+ analysis['source']['word_count'],
204
+ analysis['target']['word_count'],
205
+ analysis['word_ratio'],
206
+ analysis['source']['character_count'],
207
+ analysis['target']['character_count'],
208
+ analysis['translation_ratio'],
209
+ analysis['source']['lexical_diversity'],
210
+ analysis['target']['lexical_diversity'],
211
+ analysis['processing_time']
212
+ ])
213
+
214
+ return output.getvalue()
215
+
216
+ # Initialize the app
217
+ app = LinguisticTranslationApp()
218
+
219
+ # Custom CSS for linguistic interface
220
+ custom_css = """
221
+ #logo {
222
+ display: block;
223
+ margin: 0 auto 20px auto;
224
+ }
225
+
226
+ .linguistic-panel {
227
+ background: linear-gradient(135deg, #f0f9ff 0%, #e0f2fe 100%);
228
+ border: 1px solid #0891b2;
229
+ border-radius: 12px;
230
+ padding: 20px;
231
+ margin: 10px 0;
232
+ }
233
+
234
+ .analysis-metric {
235
+ background: white;
236
+ padding: 10px;
237
+ border-radius: 8px;
238
+ margin: 5px;
239
+ border-left: 4px solid #0891b2;
240
+ }
241
+
242
+ .status-success {
243
+ color: #059669 !important;
244
+ font-weight: 500;
245
+ }
246
+
247
+ .status-error {
248
+ color: #DC2626 !important;
249
+ font-weight: 500;
250
+ }
251
+
252
+ .gradient-text {
253
+ background: linear-gradient(45deg, #059669, #0891b2);
254
+ -webkit-background-clip: text;
255
+ -webkit-text-fill-color: transparent;
256
+ background-clip: text;
257
+ }
258
+
259
+ .linguistic-header {
260
+ text-align: center;
261
+ margin-bottom: 30px;
262
+ padding: 20px;
263
+ background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
264
+ border-radius: 16px;
265
+ border: 1px solid #cbd5e1;
266
+ }
267
+
268
+ .comparison-grid {
269
+ display: grid;
270
+ grid-template-columns: 1fr 1fr;
271
+ gap: 15px;
272
+ margin: 15px 0;
273
+ }
274
+
275
+ .metric-card {
276
+ background: white;
277
+ padding: 15px;
278
+ border-radius: 8px;
279
+ border: 1px solid #e2e8f0;
280
+ text-align: center;
281
+ }
282
+ """
283
+
284
+ # Create the Gradio interface
285
+ with gr.Blocks(css=custom_css, title="Linguistic Translation Analysis Tool", theme=gr.themes.Soft()) as demo:
286
 
287
+ # Header section
288
  with gr.Row():
289
+ with gr.Column():
290
+ gr.HTML("""
291
+ <div class='linguistic-header'>
292
+ <h1 class='gradient-text' style='font-size: 2.5em; margin-bottom: 10px;'>
293
+ πŸ”¬ Siswati ⇄ English Linguistic Analysis Tool
294
+ </h1>
295
+ <p style='font-size: 1.1em; color: #475569; max-width: 800px; margin: 0 auto;'>
296
+ Advanced translation system with comprehensive linguistic analysis for researchers,
297
+ linguists, and language documentation projects. Includes morphological insights,
298
+ statistical analysis, and corpus management features.
299
+ </p>
300
+ </div>
301
+ """)
302
+
303
+ # Main translation interface
304
  with gr.Row():
305
+ with gr.Column(scale=2):
306
+ # Input section
307
+ with gr.Group():
308
+ gr.HTML("<h3>πŸ“ Translation Input</h3>")
309
+ direction = gr.Radio(
310
+ choices=['English β†’ Siswati', 'Siswati β†’ English'],
311
+ label="Translation Direction",
312
+ value='English β†’ Siswati',
313
+ interactive=True
314
+ )
315
+
316
+ input_text = gr.Textbox(
317
+ lines=6,
318
+ placeholder="Enter your text here for linguistic analysis... (maximum 2000 characters)",
319
+ label="Source Text",
320
+ max_lines=12,
321
+ show_copy_button=True
322
+ )
323
+
324
+ char_count = gr.HTML("Character count: 0/2000")
325
+
326
+ with gr.Row():
327
+ translate_btn = gr.Button("πŸ”„ Translate & Analyze", variant="primary", size="lg")
328
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
329
+
330
+ # Output section
331
+ with gr.Group():
332
+ gr.HTML("<h3>✨ Translation Output</h3>")
333
+ output_text = gr.Textbox(
334
+ label="Translation",
335
+ lines=6,
336
+ max_lines=12,
337
+ show_copy_button=True,
338
+ interactive=False
339
+ )
340
+ status_display = gr.HTML()
341
+
342
+ # Linguistic analysis panel
343
+ with gr.Column(scale=1):
344
+ with gr.Group():
345
+ gr.HTML("<h3>πŸ“Š Linguistic Analysis</h3>")
346
+
347
+ # Real-time metrics
348
+ with gr.Accordion("πŸ“ˆ Text Metrics", open=True):
349
+ metrics_display = gr.HTML("""
350
+ <div style='text-align: center; color: #64748b; padding: 20px;'>
351
+ <em>Translate text to see linguistic analysis</em>
352
+ </div>
353
+ """)
354
+
355
+ # Language-specific features
356
+ with gr.Accordion("πŸ” Language Features", open=False):
357
+ features_display = gr.HTML("")
358
+
359
+ # Translation quality indicators
360
+ with gr.Accordion("βš–οΈ Translation Ratios", open=False):
361
+ ratios_display = gr.HTML("")
362
+
363
+ # Batch processing section
364
+ with gr.Accordion("πŸ“š Batch Translation & Corpus Analysis", open=False):
365
+ with gr.Row():
366
+ with gr.Column():
367
+ gr.HTML("<h4>Upload text file or enter multiple lines:</h4>")
368
+ batch_input = gr.File(
369
+ label="Upload .txt file",
370
+ file_types=[".txt"],
371
+ type="filepath"
372
+ )
373
+ batch_text = gr.Textbox(
374
+ lines=8,
375
+ placeholder="Or paste multiple lines here (one per line)...",
376
+ label="Batch Text Input",
377
+ show_copy_button=True
378
+ )
379
+ batch_direction = gr.Radio(
380
+ choices=['English β†’ Siswati', 'Siswati β†’ English'],
381
+ label="Batch Translation Direction",
382
+ value='English β†’ Siswati'
383
+ )
384
+ batch_btn = gr.Button("πŸ”„ Process Batch", variant="primary")
385
+
386
+ with gr.Column():
387
+ batch_results = gr.Dataframe(
388
+ headers=["Index", "Source", "Translation", "Words (S→T)", "Chars (S→T)"],
389
+ label="Batch Results",
390
+ interactive=False
391
+ )
392
+
393
+ # Research tools section
394
+ with gr.Accordion("πŸ”¬ Research & Export Tools", open=False):
395
+ with gr.Row():
396
+ with gr.Column():
397
+ gr.HTML("<h4>Translation History & Export</h4>")
398
+ history_display = gr.Dataframe(
399
+ headers=["Timestamp", "Direction", "Source", "Translation"],
400
+ label="Translation History",
401
+ interactive=False
402
+ )
403
+
404
+ with gr.Row():
405
+ refresh_history_btn = gr.Button("πŸ”„ Refresh History")
406
+ export_csv_btn = gr.Button("πŸ“Š Export CSV", variant="secondary")
407
+ clear_history_btn = gr.Button("πŸ—‘οΈ Clear History", variant="stop")
408
+
409
+ csv_download = gr.File(label="Download CSV", visible=False)
410
+
411
+ with gr.Column():
412
+ gr.HTML("<h4>Linguistic Resources</h4>")
413
+ gr.HTML("""
414
+ <div style='background: #f8fafc; padding: 20px; border-radius: 8px; border: 1px solid #e2e8f0;'>
415
+ <h5>πŸ“– Siswati Language Notes:</h5>
416
+ <ul style='text-align: left; margin: 10px 0;'>
417
+ <li><strong>Script:</strong> Latin alphabet</li>
418
+ <li><strong>Family:</strong> Niger-Congo, Bantu</li>
419
+ <li><strong>Features:</strong> Agglutinative, click consonants</li>
420
+ <li><strong>Speakers:</strong> ~2.3 million (Eswatini, South Africa)</li>
421
+ </ul>
422
+ <h5>πŸ”§ Research Features:</h5>
423
+ <ul style='text-align: left; margin: 10px 0;'>
424
+ <li>Morphological complexity analysis</li>
425
+ <li>Translation ratio tracking</li>
426
+ <li>Lexical diversity measurement</li>
427
+ <li>Batch processing for corpora</li>
428
+ <li>Export capabilities for further analysis</li>
429
+ </ul>
430
+ </div>
431
+ """)
432
+
433
+ # Examples for linguists
434
+ with gr.Accordion("πŸ’‘ Linguistic Examples", open=False):
435
+ examples = gr.Examples(
436
+ examples=[
437
+ ["The child is playing with traditional toys.", "English β†’ Siswati"],
438
+ ["Umntfwana udlala ngetinsisimane tesintu.", "Siswati β†’ English"],
439
+ ["Agglutination demonstrates morphological complexity in Bantu languages.", "English β†’ Siswati"],
440
+ ["Lolimi lune-morphology leyinkimbinkimbi.", "Siswati β†’ English"],
441
+ ["What are the phonological features of this language?", "English β†’ Siswati"],
442
+ ["Yini tinchubo te-phonology talolimi?", "Siswati β†’ English"],
443
+ ],
444
+ inputs=[input_text, direction],
445
+ label="Click examples to analyze linguistic features:"
446
+ )
447
+
448
+ # Footer
449
+ with gr.Row():
450
+ with gr.Column():
451
+ gr.HTML("""
452
+ <div style='text-align: center; margin-top: 40px; padding: 30px; border-top: 1px solid #E5E7EB; background: #f8fafc;'>
453
+ <div style='margin-bottom: 20px;'>
454
+ <a href='https://github.com/dsfsi/en-ss-m2m100-combo' target='_blank' style='margin: 0 15px; color: #0891b2; text-decoration: none;'>πŸ“ Enβ†’Ss Model Repository</a>
455
+ <a href='https://github.com/dsfsi/ss-en-m2m100-combo' target='_blank' style='margin: 0 15px; color: #0891b2; text-decoration: none;'>πŸ“ Ssβ†’En Model Repository</a>
456
+ <a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank' style='margin: 0 15px; color: #0891b2; text-decoration: none;'>πŸ’¬ Research Feedback</a>
457
+ </div>
458
+ <div style='color: #475569; font-size: 0.95em;'>
459
+ <strong>Research Team:</strong> Vukosi Marivate, Richard Lastrucci<br>
460
+ <em>Supporting African language documentation and computational linguistics research</em><br>
461
+ <small style='color: #64748b; margin-top: 10px; display: block;'>
462
+ For academic use: Please cite the original models in your publications
463
+ </small>
464
+ </div>
465
+ </div>
466
+ """)
467
+
468
+ # Event handlers
469
+ def update_char_count(text):
470
+ count = len(text) if text else 0
471
+ color = "#DC2626" if count > 2000 else "#059669" if count > 1600 else "#64748b"
472
+ return f"<span style='color: {color}; font-weight: 500;'>Character count: {count}/2000</span>"
473
+
474
+ def clear_all():
475
+ return "", "", "Character count: 0/2000", "", "", "", ""
476
+
477
+ def translate_with_analysis(text, direction):
478
+ translation, status, success, analysis = app.translate_text(text, direction)
479
+ status_html = f"<div class='{'status-success' if success else 'status-error'}'>{status}</div>"
480
+
481
+ if success and analysis:
482
+ # Create metrics display
483
+ source_metrics = analysis['source']
484
+ target_metrics = analysis['target']
485
+
486
+ metrics_html = f"""
487
+ <div class='comparison-grid'>
488
+ <div class='metric-card'>
489
+ <h5>πŸ“Š Source Text</h5>
490
+ <p><strong>Words:</strong> {source_metrics['word_count']}</p>
491
+ <p><strong>Characters:</strong> {source_metrics['character_count']}</p>
492
+ <p><strong>Sentences:</strong> {source_metrics['sentence_count']}</p>
493
+ <p><strong>Lexical Diversity:</strong> {source_metrics['lexical_diversity']:.3f}</p>
494
+ </div>
495
+ <div class='metric-card' style='border-left: 4px solid #059669;'>
496
+ <h5>πŸ“Š Translation</h5>
497
+ <p><strong>Words:</strong> {target_metrics['word_count']}</p>
498
+ <p><strong>Characters:</strong> {target_metrics['character_count']}</p>
499
+ <p><strong>Sentences:</strong> {target_metrics['sentence_count']}</p>
500
+ <p><strong>Lexical Diversity:</strong> {target_metrics['lexical_diversity']:.3f}</p>
501
+ </div>
502
+ </div>
503
+ """
504
+
505
+ # Language features
506
+ features_html = ""
507
+ if 'potential_agglutination' in source_metrics:
508
+ features_html = f"""
509
+ <div class='analysis-metric'>
510
+ <h5>πŸ” Siswati Features Detected:</h5>
511
+ <p><strong>Potential agglutinated words:</strong> {source_metrics['potential_agglutination']}</p>
512
+ <p><strong>Click consonants (c,q,x):</strong> {source_metrics['click_consonants']}</p>
513
+ <p><strong>Tone markers:</strong> {source_metrics['tone_markers']}</p>
514
  </div>
515
  """
516
+
517
+ # Translation ratios
518
+ ratios_html = f"""
519
+ <div class='analysis-metric'>
520
+ <h5>βš–οΈ Translation Ratios:</h5>
521
+ <p><strong>Word ratio:</strong> {analysis['word_ratio']:.3f}</p>
522
+ <p><strong>Character ratio:</strong> {analysis['translation_ratio']:.3f}</p>
523
+ <p><strong>Processing time:</strong> {analysis['processing_time']:.3f}s</p>
524
+ </div>
525
+ """
526
+
527
+ return translation, status_html, metrics_html, features_html, ratios_html
528
+
529
+ return translation, status_html, "", "", ""
530
 
531
+ def process_batch(file_path, batch_text, direction):
532
+ texts = []
533
+
534
+ if file_path:
535
+ try:
536
+ with open(file_path, 'r', encoding='utf-8') as f:
537
+ texts = [line.strip() for line in f.readlines() if line.strip()]
538
+ except Exception as e:
539
+ return [[f"Error reading file: {str(e)}", "", "", "", ""]]
540
+ elif batch_text:
541
+ texts = [line.strip() for line in batch_text.split('\n') if line.strip()]
542
+
543
+ if not texts:
544
+ return [["No text provided", "", "", "", ""]]
545
+
546
+ results = app.batch_translate(texts, direction)
547
+
548
+ # Format for display
549
+ display_data = []
550
+ for r in results:
551
+ if r['success']:
552
+ word_ratio = f"{r['analysis']['source']['word_count']}β†’{r['analysis']['target']['word_count']}"
553
+ char_ratio = f"{r['analysis']['source']['character_count']}β†’{r['analysis']['target']['character_count']}"
554
+ else:
555
+ word_ratio = "Error"
556
+ char_ratio = "Error"
557
+
558
+ display_data.append([
559
+ r['index'],
560
+ r['source'][:50] + "..." if len(r['source']) > 50 else r['source'],
561
+ r['translation'][:50] + "..." if len(r['translation']) > 50 else r['translation'],
562
+ word_ratio,
563
+ char_ratio
564
+ ])
565
+
566
+ return display_data
567
+
568
+ def get_history():
569
+ if not app.translation_history:
570
+ return []
571
+
572
+ return [[
573
+ entry['timestamp'][:19], # Remove microseconds
574
+ entry['direction'],
575
+ entry['source_text'][:50] + "..." if len(entry['source_text']) > 50 else entry['source_text'],
576
+ entry['translated_text'][:50] + "..." if len(entry['translated_text']) > 50 else entry['translated_text']
577
+ ] for entry in app.translation_history[-20:]] # Show last 20
578
+
579
+ def export_csv():
580
+ csv_content = app.export_history_csv()
581
+ if csv_content:
582
+ filename = f"translation_history_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
583
+ return gr.File.update(value=csv_content, visible=True, label=f"πŸ“Š {filename}")
584
+ return gr.File.update(visible=False)
585
+
586
+ def clear_history():
587
+ app.translation_history = []
588
+ return []
589
+
590
+ # Wire up events
591
+ input_text.change(fn=update_char_count, inputs=input_text, outputs=char_count)
592
+
593
+ translate_btn.click(
594
+ fn=translate_with_analysis,
595
+ inputs=[input_text, direction],
596
+ outputs=[output_text, status_display, metrics_display, features_display, ratios_display]
597
+ )
598
+
599
+ clear_btn.click(
600
+ fn=clear_all,
601
+ outputs=[input_text, output_text, char_count, status_display, metrics_display, features_display, ratios_display]
602
+ )
603
+
604
+ batch_btn.click(
605
+ fn=process_batch,
606
+ inputs=[batch_input, batch_text, batch_direction],
607
+ outputs=batch_results
608
+ )
609
+
610
+ refresh_history_btn.click(fn=get_history, outputs=history_display)
611
+ export_csv_btn.click(fn=export_csv, outputs=csv_download)
612
+ clear_history_btn.click(fn=clear_history, outputs=history_display)
613
+
614
+ # Auto-translate on Enter
615
+ input_text.submit(
616
+ fn=translate_with_analysis,
617
+ inputs=[input_text, direction],
618
+ outputs=[output_text, status_display, metrics_display, features_display, ratios_display]
619
+ )
620
 
621
+ if __name__ == "__main__":
622
+ demo.launch(
623
+ server_name="0.0.0.0",
624
+ server_port=7860,
625
+ share=False,
626
+ debug=True
627
+ )