PromptMeister commited on
Commit
c2f1156
Β·
verified Β·
1 Parent(s): cd53598

Update app.py

Browse files

added voice function back as it was left out when aisnipper colors were added

Files changed (1) hide show
  1. app.py +970 -241
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Your existing imports remain the same
2
  import gradio as gr
3
  import numpy as np
4
  import pandas as pd
@@ -143,53 +142,6 @@ ai_snipper_css = """
143
  color: var(--text-primary) !important;
144
  }
145
 
146
- /* File upload areas */
147
- .gr-file-upload {
148
- background: var(--bg-card) !important;
149
- border: 2px dashed var(--border-accent) !important;
150
- border-radius: 16px !important;
151
- color: var(--text-secondary) !important;
152
- transition: all 0.3s ease !important;
153
- }
154
-
155
- .gr-file-upload:hover {
156
- border-color: var(--ai-cyan) !important;
157
- background: var(--bg-card-hover) !important;
158
- }
159
-
160
- /* Audio input */
161
- .gr-audio {
162
- background: var(--gradient-card) !important;
163
- border: 1px solid var(--border-primary) !important;
164
- border-radius: 12px !important;
165
- }
166
-
167
- /* Sliders */
168
- .gr-slider input[type="range"] {
169
- background: var(--bg-secondary) !important;
170
- }
171
-
172
- .gr-slider input[type="range"]::-webkit-slider-track {
173
- background: var(--bg-secondary) !important;
174
- border-radius: 6px !important;
175
- }
176
-
177
- .gr-slider input[type="range"]::-webkit-slider-thumb {
178
- background: var(--gradient-button) !important;
179
- border: none !important;
180
- border-radius: 50% !important;
181
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2) !important;
182
- }
183
-
184
- /* Radio buttons and checkboxes */
185
- .gr-radio input[type="radio"] {
186
- accent-color: var(--ai-cyan) !important;
187
- }
188
-
189
- .gr-checkbox input[type="checkbox"] {
190
- accent-color: var(--ai-cyan) !important;
191
- }
192
-
193
  /* Tabs */
194
  .gr-tab-nav {
195
  background: var(--gradient-card) !important;
@@ -214,215 +166,995 @@ ai_snipper_css = """
214
  box-shadow: 0 2px 4px rgba(6, 182, 212, 0.3) !important;
215
  }
216
 
217
- .gr-tab-nav button:hover:not(.selected) {
218
- background: var(--bg-card-hover) !important;
219
- color: var(--text-primary) !important;
220
- }
221
-
222
- /* Tab content */
223
- .gr-tabitem {
224
  background: var(--gradient-card) !important;
225
  border: 1px solid var(--border-primary) !important;
226
  border-radius: 12px !important;
227
- padding: 1.5rem !important;
228
- margin-top: 1rem !important;
229
  }
230
 
231
- /* Progress bars */
232
- .gr-progress {
233
- background: var(--bg-secondary) !important;
234
- border-radius: 6px !important;
235
- }
236
-
237
- .gr-progress-bar {
238
  background: var(--gradient-button) !important;
239
- border-radius: 6px !important;
240
- }
241
-
242
- /* Accordion */
243
- .gr-accordion {
244
- background: var(--gradient-card) !important;
245
- border: 1px solid var(--border-primary) !important;
246
- border-radius: 12px !important;
247
- }
248
-
249
- .gr-accordion summary {
250
- background: var(--bg-card) !important;
251
- color: var(--text-primary) !important;
252
- padding: 1rem !important;
253
- border-radius: 12px !important;
254
- cursor: pointer !important;
255
- font-weight: 600 !important;
256
- }
257
-
258
- .gr-accordion[open] summary {
259
- border-bottom: 1px solid var(--border-primary) !important;
260
- border-radius: 12px 12px 0 0 !important;
261
- }
262
-
263
- /* JSON output */
264
- .gr-json {
265
- background: var(--bg-secondary) !important;
266
- border: 1px solid var(--border-primary) !important;
267
- border-radius: 12px !important;
268
- color: var(--text-primary) !important;
269
- }
270
-
271
- /* HTML output areas */
272
- .gr-html {
273
- background: var(--gradient-card) !important;
274
- border: 1px solid var(--border-primary) !important;
275
- border-radius: 12px !important;
276
- padding: 1rem !important;
277
  }
278
 
279
- /* Plot containers */
280
- .gr-plot {
281
- background: var(--gradient-card) !important;
282
- border: 1px solid var(--border-primary) !important;
283
- border-radius: 12px !important;
284
- padding: 1rem !important;
285
  }
286
 
287
- /* Rows and columns */
288
- .gr-row {
289
- gap: 1.5rem !important;
290
  }
 
291
 
292
- .gr-column {
293
- gap: 1rem !important;
294
- }
 
 
 
 
 
295
 
296
- /* Scrollbars */
297
- ::-webkit-scrollbar {
298
- width: 8px;
299
- height: 8px;
300
- }
301
 
302
- ::-webkit-scrollbar-track {
303
- background: var(--bg-secondary);
304
- border-radius: 4px;
305
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
- ::-webkit-scrollbar-thumb {
308
- background: var(--gradient-button);
309
- border-radius: 4px;
310
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
- ::-webkit-scrollbar-thumb:hover {
313
- background: var(--ai-cyan);
314
- }
 
 
 
 
 
 
 
 
 
315
 
316
- /* Custom DNA-themed elements */
317
- .dna-header {
318
- position: relative;
319
- text-align: center;
320
- padding: 2rem 0;
321
- margin-bottom: 2rem;
322
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
- .dna-header::before {
325
- content: '';
326
- position: absolute;
327
- top: 0;
328
- left: 50%;
329
- transform: translateX(-50%);
330
- width: 100px;
331
- height: 4px;
332
- background: var(--gradient-primary);
333
- border-radius: 2px;
334
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
335
 
336
- .dna-subtitle {
337
- color: var(--text-muted) !important;
338
- font-size: 1.2rem !important;
339
- margin-top: 1rem !important;
340
- font-weight: 400 !important;
341
- }
 
 
 
 
 
 
 
 
 
 
 
 
342
 
343
- /* Example button styling */
344
- .example-buttons .gr-button {
345
- background: var(--bg-card) !important;
346
- color: var(--text-accent) !important;
347
- border: 1px solid var(--border-accent) !important;
348
- font-size: 0.875rem !important;
349
- padding: 0.5rem 1rem !important;
350
- }
 
 
351
 
352
- .example-buttons .gr-button:hover {
353
- background: var(--gradient-button) !important;
354
- color: var(--text-primary) !important;
355
- border-color: transparent !important;
356
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
 
358
- /* Status messages */
359
- .status-message {
360
- text-align: center !important;
361
- padding: 1rem !important;
362
- border-radius: 8px !important;
363
- margin: 1rem 0 !important;
364
- font-weight: 500 !important;
365
- }
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- .status-loading {
368
- background: rgba(6, 182, 212, 0.1) !important;
369
- border: 1px solid var(--border-accent) !important;
370
- color: var(--text-accent) !important;
371
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
372
 
373
- .status-success {
374
- background: rgba(20, 184, 166, 0.1) !important;
375
- border: 1px solid var(--ai-teal) !important;
376
- color: var(--ai-teal) !important;
377
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
 
379
- .status-error {
380
- background: rgba(239, 68, 68, 0.1) !important;
381
- border: 1px solid #ef4444 !important;
382
- color: #ef4444 !important;
383
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
- /* Footer hiding */
386
- footer {
387
- visibility: hidden !important;
388
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
- /* Mobile responsiveness */
391
- @media (max-width: 768px) {
392
- .gradio-container h1 {
393
- font-size: 2rem !important;
394
- }
395
 
396
- .gr-button {
397
- width: 100% !important;
398
- justify-content: center !important;
399
- }
 
 
 
 
 
 
400
 
401
- .gr-row {
402
- flex-direction: column !important;
403
- }
404
- }
405
- """
406
-
407
- # Keep all your existing function code exactly the same
408
- # [Your existing global variables and all functions remain unchanged]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
- # Global variables to store models
411
- tokenizer = None
412
- ner_pipeline = None
413
- pos_pipeline = None
414
- intent_classifier = None
415
- semantic_model = None
416
- stt_model = None # Speech-to-text model
417
- models_loaded = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
418
 
419
- # Database to store keyword ranking history (in-memory database for this example)
420
- ranking_history = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
- # [Keep all your existing functions - load_models, speech_to_text, etc.]
423
- # I'm not repeating them here to save space, but they should remain exactly the same
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
- # Updated Gradio interface with AI Snipper styling
426
  with gr.Blocks(
427
  css=ai_snipper_css,
428
  title="🧬 AI Snipper Keyword DNA Analyzer",
@@ -436,9 +1168,11 @@ with gr.Blocks(
436
 
437
  # Custom header with DNA theme
438
  gr.HTML("""
439
- <div class="dna-header">
440
- <h1>🧬 Keyword DNA Analyzer</h1>
441
- <p class="dna-subtitle">
 
 
442
  Decode the genetic structure of your keywords with AI-powered analysis
443
  </p>
444
  </div>
@@ -489,19 +1223,18 @@ with gr.Blocks(
489
 
490
  # Status indicator with custom styling
491
  status_html = gr.HTML(
492
- '<div class="status-message">πŸš€ Enter a keyword and click "Analyze DNA" to begin</div>'
493
  )
494
 
495
  # Main analyze button
496
  analyze_btn = gr.Button(
497
  "🧬 Analyze DNA",
498
- variant="primary",
499
- size="lg"
500
  )
501
 
502
  # Example buttons with custom styling
503
  gr.Markdown("### πŸ’‘ Try These Examples")
504
- with gr.Row(elem_classes="example-buttons"):
505
  example_btns = []
506
  examples = [
507
  "preprocessing",
@@ -533,7 +1266,7 @@ with gr.Blocks(
533
  with gr.Tab("πŸ’Ύ Raw Data"):
534
  json_output = gr.JSON()
535
 
536
- # Event handlers remain the same but with updated status messages
537
  voice_submit_btn.click(
538
  handle_voice_input,
539
  inputs=[audio_input],
@@ -542,14 +1275,14 @@ with gr.Blocks(
542
 
543
  # Updated status messages with custom styling
544
  analyze_btn.click(
545
- lambda: '<div class="status-message status-loading">πŸ”„ Loading models and analyzing... This may take a moment.</div>',
546
  outputs=status_html
547
  ).then(
548
  analyze_keyword,
549
  inputs=[input_text, forecast_months, growth_scenario, include_serp],
550
  outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
551
  ).then(
552
- lambda: '<div class="status-message status-success">βœ… Analysis complete! Check the results above.</div>',
553
  outputs=status_html
554
  )
555
 
@@ -564,21 +1297,17 @@ with gr.Blocks(
564
  inputs=[btn],
565
  outputs=[input_text]
566
  ).then(
567
- lambda: '<div class="status-message status-loading">οΏ½οΏ½οΏ½ Loading models and analyzing... This may take a moment.</div>',
568
  outputs=status_html
569
  ).then(
570
  analyze_keyword,
571
  inputs=[input_text, forecast_months, growth_scenario, include_serp],
572
  outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
573
  ).then(
574
- lambda: '<div class="status-message status-success">βœ… Analysis complete! Check the results above.</div>',
575
  outputs=status_html
576
  )
577
 
578
  # Launch configuration
579
  if __name__ == "__main__":
580
- demo.launch(
581
- share=True,
582
- show_error=True,
583
- debug=True
584
- )
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import pandas as pd
 
142
  color: var(--text-primary) !important;
143
  }
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  /* Tabs */
146
  .gr-tab-nav {
147
  background: var(--gradient-card) !important;
 
166
  box-shadow: 0 2px 4px rgba(6, 182, 212, 0.3) !important;
167
  }
168
 
169
+ /* Other elements */
170
+ .gr-audio, .gr-file-upload {
 
 
 
 
 
171
  background: var(--gradient-card) !important;
172
  border: 1px solid var(--border-primary) !important;
173
  border-radius: 12px !important;
 
 
174
  }
175
 
176
+ .gr-slider input[type="range"]::-webkit-slider-thumb {
 
 
 
 
 
 
177
  background: var(--gradient-button) !important;
178
+ border: none !important;
179
+ border-radius: 50% !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  }
181
 
182
+ .gr-radio input[type="radio"], .gr-checkbox input[type="checkbox"] {
183
+ accent-color: var(--ai-cyan) !important;
 
 
 
 
184
  }
185
 
186
+ /* Footer hiding */
187
+ footer {
188
+ visibility: hidden !important;
189
  }
190
+ """
191
 
192
+ # Global variables to store models
193
+ tokenizer = None
194
+ ner_pipeline = None
195
+ pos_pipeline = None
196
+ intent_classifier = None
197
+ semantic_model = None
198
+ stt_model = None # Speech-to-text model
199
+ models_loaded = False
200
 
201
+ # Database to store keyword ranking history (in-memory database for this example)
202
+ # In a real app, you would use a proper database
203
+ ranking_history = {}
 
 
204
 
205
+ def load_models(progress=gr.Progress()):
206
+ """Lazy-load models only when needed"""
207
+ global tokenizer, ner_pipeline, pos_pipeline, intent_classifier, semantic_model, stt_model, models_loaded
208
+
209
+ if models_loaded:
210
+ return True
211
+
212
+ try:
213
+ progress(0.1, desc="Loading models...")
214
+
215
+ # Use smaller models and load them sequentially to reduce memory pressure
216
+ from transformers import AutoTokenizer, pipeline
217
+
218
+ progress(0.2, desc="Loading tokenizer...")
219
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
220
+
221
+ progress(0.3, desc="Loading NER model...")
222
+ ner_pipeline = pipeline("ner", model="dslim/bert-base-NER")
223
+
224
+ progress(0.4, desc="Loading POS model...")
225
+ # Use smaller POS model
226
+ from transformers import AutoModelForTokenClassification, BertTokenizerFast
227
+ pos_model = AutoModelForTokenClassification.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
228
+ pos_tokenizer = BertTokenizerFast.from_pretrained("vblagoje/bert-english-uncased-finetuned-pos")
229
+ pos_pipeline = pipeline("token-classification", model=pos_model, tokenizer=pos_tokenizer)
230
+
231
+ progress(0.6, desc="Loading intent classifier...")
232
+ # Use a smaller model for zero-shot classification
233
+ intent_classifier = pipeline(
234
+ "zero-shot-classification",
235
+ model="typeform/distilbert-base-uncased-mnli", # Smaller than BART
236
+ device=0 if torch.cuda.is_available() else -1 # Use GPU if available
237
+ )
238
+
239
+ progress(0.7, desc="Loading speech-to-text model...")
240
+ try:
241
+ # Load automatic speech recognition model
242
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
243
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small.en")
244
+ stt_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small.en")
245
+ stt_model = (processor, stt_model)
246
+ except Exception as e:
247
+ print(f"Warning: Could not load speech-to-text model: {str(e)}")
248
+ stt_model = None # Set to None so we can check if it's available
249
+
250
+ progress(0.8, desc="Loading semantic model...")
251
+ try:
252
+ from sentence_transformers import SentenceTransformer
253
+ semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
254
+ except Exception as e:
255
+ print(f"Warning: Could not load semantic model: {str(e)}")
256
+ semantic_model = None # Set to None so we can check if it's available
257
+
258
+ progress(1.0, desc="Models loaded successfully!")
259
+ models_loaded = True
260
+ return True
261
+
262
+ except Exception as e:
263
+ print(f"Error loading models: {str(e)}")
264
+ return f"Error: {str(e)}"
265
 
266
+ def speech_to_text(audio_path):
267
+ """Convert speech to text using the loaded speech-to-text model"""
268
+ if stt_model is None:
269
+ return "Speech-to-text model not loaded. Please try text input instead."
270
+
271
+ try:
272
+ import librosa
273
+ import numpy as np
274
+
275
+ # Load audio file
276
+ audio, sr = librosa.load(audio_path, sr=16000)
277
+
278
+ # Process audio with Whisper
279
+ processor, model = stt_model
280
+ input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features
281
+
282
+ # Generate token ids
283
+ predicted_ids = model.generate(input_features)
284
+
285
+ # Decode token ids to text
286
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
287
+
288
+ return transcription
289
+ except Exception as e:
290
+ print(f"Error in speech_to_text: {str(e)}")
291
+ return f"Error processing speech: {str(e)}"
292
 
293
+ def handle_voice_input(audio):
294
+ """Handle voice input and convert to text"""
295
+ if audio is None:
296
+ return "No audio detected. Please try again."
297
+
298
+ try:
299
+ # Convert speech to text
300
+ text = speech_to_text(audio)
301
+ return text
302
+ except Exception as e:
303
+ print(f"Error in handle_voice_input: {str(e)}")
304
+ return f"Error: {str(e)}"
305
 
306
+ def simulate_google_serp(keyword, num_results=10):
307
+ """Simulate Google SERP results for a keyword"""
308
+ try:
309
+ # In a real implementation, this would call the Google API
310
+ # For now, we'll generate fake SERP data
311
+
312
+ # Deterministic seed for consistent results by keyword
313
+ np.random.seed(sum(ord(c) for c in keyword))
314
+
315
+ serp_results = []
316
+ domains = [
317
+ "example.com", "wikipedia.org", "medium.com", "github.com",
318
+ "stackoverflow.com", "amazon.com", "youtube.com", "reddit.com",
319
+ "linkedin.com", "twitter.com", "facebook.com", "instagram.com"
320
+ ]
321
+
322
+ for i in range(1, num_results + 1):
323
+ domain = domains[i % len(domains)]
324
+ title = f"{keyword.title()} - {domain.split('.')[0].title()} Resource #{i}"
325
+ snippet = f"This is a simulated SERP result for '{keyword}'. Result #{i} would provide relevant information about this topic."
326
+ url = f"https://www.{domain}/{keyword.replace(' ', '-')}-resource-{i}"
327
+
328
+ position = i
329
+ ctr = round(0.3 * (0.85 ** (i - 1)), 4) # Simulate click-through rate decay
330
+
331
+ serp_results.append({
332
+ "position": position,
333
+ "title": title,
334
+ "url": url,
335
+ "domain": domain,
336
+ "snippet": snippet,
337
+ "ctr_estimate": ctr,
338
+ "impressions_estimate": np.random.randint(1000, 10000)
339
+ })
340
+
341
+ return serp_results
342
+ except Exception as e:
343
+ print(f"Error in simulate_google_serp: {str(e)}")
344
+ return []
345
 
346
+ def update_ranking_history(keyword, serp_results):
347
+ """Update the ranking history for a keyword"""
348
+ try:
349
+ # Get current timestamp
350
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
351
+
352
+ # Initialize if keyword not in history
353
+ if keyword not in ranking_history:
354
+ ranking_history[keyword] = []
355
+
356
+ # Add new entry
357
+ ranking_history[keyword].append({
358
+ "timestamp": timestamp,
359
+ "results": serp_results[:5] # Store top 5 results for history
360
+ })
361
+
362
+ # Keep only last 10 entries for each keyword
363
+ if len(ranking_history[keyword]) > 10:
364
+ ranking_history[keyword] = ranking_history[keyword][-10:]
365
+
366
+ return True
367
+ except Exception as e:
368
+ print(f"Error in update_ranking_history: {str(e)}")
369
+ return False
370
 
371
+ def get_semantic_similarity(token, comparison_terms):
372
+ """Calculate semantic similarity between a token and comparison terms"""
373
+ try:
374
+ from sklearn.metrics.pairwise import cosine_similarity
375
+
376
+ token_embedding = semantic_model.encode([token])[0]
377
+ comparison_embeddings = semantic_model.encode(comparison_terms)
378
+
379
+ similarities = []
380
+ for i, emb in enumerate(comparison_embeddings):
381
+ similarity = cosine_similarity([token_embedding], [emb])[0][0]
382
+ similarities.append((comparison_terms[i], float(similarity)))
383
+
384
+ return sorted(similarities, key=lambda x: x[1], reverse=True)
385
+ except Exception as e:
386
+ print(f"Error in semantic similarity: {str(e)}")
387
+ # Return dummy data on error
388
+ return [(term, 0.5) for term in comparison_terms]
389
 
390
+ def get_token_colors(token_type):
391
+ colors = {
392
+ "prefix": "#D8BFD8", # Light purple
393
+ "suffix": "#AEDAA4", # Light green
394
+ "stem": "#A4C2F4", # Light blue
395
+ "compound_first": "#FFCC80", # Light orange
396
+ "compound_second": "#FFCC80", # Light orange
397
+ "word": "#E5E5E5" # Light gray
398
+ }
399
+ return colors.get(token_type, "#E5E5E5")
400
 
401
+ def simulate_historical_data(token):
402
+ """Generate simulated historical usage data for a token"""
403
+ eras = ["1900s", "1950s", "1980s", "2000s", "2010s", "Present"]
404
+
405
+ # Different patterns based on token characteristics
406
+ if len(token) > 8:
407
+ # Possibly a technical term - recent growth
408
+ values = [10, 20, 30, 60, 85, 95]
409
+ elif token.startswith(("un", "re", "de", "pre")):
410
+ # Prefix words tend to be older
411
+ values = [45, 50, 60, 70, 75, 80]
412
+ else:
413
+ # Standard pattern for common words
414
+ # Use token hash value modulo instead of hash() directly to avoid different results across runs
415
+ base = 50 + (sum(ord(c) for c in token) % 30)
416
+ # Use a fixed seed for reproducibility
417
+ np.random.seed(sum(ord(c) for c in token))
418
+ noise = np.random.normal(0, 5, 6)
419
+ values = [max(5, min(95, base + i*5 + n)) for i, n in enumerate(noise)]
420
+
421
+ return list(zip(eras, values))
422
 
423
+ def generate_origin_data(token):
424
+ """Generate simulated origin/etymology data for a token"""
425
+ origins = [
426
+ {"era": "Ancient", "language": "Latin"},
427
+ {"era": "Ancient", "language": "Greek"},
428
+ {"era": "Medieval", "language": "Old English"},
429
+ {"era": "16th century", "language": "French"},
430
+ {"era": "18th century", "language": "Germanic"},
431
+ {"era": "19th century", "language": "Anglo-Saxon"},
432
+ {"era": "20th century", "language": "Modern English"}
433
+ ]
434
+
435
+ # Deterministic selection based on the token
436
+ index = sum(ord(c) for c in token) % len(origins)
437
+ origin = origins[index]
438
+
439
+ note = f"First appeared in {origin['era']} texts derived from {origin['language']}."
440
+ origin["note"] = note
441
+
442
+ return origin
443
 
444
+ def analyze_token_types(tokens):
445
+ """Identify token types (prefix, suffix, compound, etc.)"""
446
+ processed_tokens = []
447
+
448
+ prefixes = ["un", "re", "de", "pre", "post", "anti", "pro", "inter", "sub", "super"]
449
+ suffixes = ["ing", "ed", "ly", "ment", "tion", "able", "ible", "ness", "ful", "less"]
450
+
451
+ for token in tokens:
452
+ token_text = token.lower()
453
+ token_type = "word"
454
+
455
+ # Check for prefixes
456
+ for prefix in prefixes:
457
+ if token_text.startswith(prefix) and len(token_text) > len(prefix) + 2:
458
+ if token_text != prefix: # Make sure the word isn't just the prefix
459
+ token_type = "prefix"
460
+ break
461
+
462
+ # Check for suffixes
463
+ if token_type == "word":
464
+ for suffix in suffixes:
465
+ if token_text.endswith(suffix) and len(token_text) > len(suffix) + 2:
466
+ token_type = "suffix"
467
+ break
468
+
469
+ # Check for compound words (simplified)
470
+ if token_type == "word" and len(token_text) > 8:
471
+ token_type = "compound_first" # Simplified - in reality would need more analysis
472
+
473
+ processed_tokens.append({
474
+ "text": token_text,
475
+ "type": token_type
476
+ })
477
+
478
+ return processed_tokens
479
 
480
+ def plot_historical_data(historical_data):
481
+ """Create a plot of historical usage data, with error handling"""
482
+ try:
483
+ eras = [item[0] for item in historical_data]
484
+ values = [item[1] for item in historical_data]
485
+
486
+ plt.figure(figsize=(8, 3))
487
+ plt.bar(eras, values, color='skyblue')
488
+ plt.title('Historical Usage')
489
+ plt.xlabel('Era')
490
+ plt.ylabel('Usage Level')
491
+ plt.ylim(0, 100)
492
+ plt.xticks(rotation=45)
493
+ plt.tight_layout()
494
+
495
+ return plt
496
+ except Exception as e:
497
+ print(f"Error in plot_historical_data: {str(e)}")
498
+ # Return a simple error plot
499
+ plt.figure(figsize=(8, 3))
500
+ plt.text(0.5, 0.5, f"Error creating plot: {str(e)}",
501
+ horizontalalignment='center', verticalalignment='center')
502
+ plt.axis('off')
503
+ return plt
504
 
505
+ def create_evolution_chart(data, forecast_months=6, growth_scenario="Moderate"):
506
+ """Create a simpler chart that's more compatible with Gradio"""
507
+ try:
508
+ import plotly.graph_objects as go
509
+
510
+ # Create a basic figure without subplots
511
+ fig = go.Figure()
512
+
513
+ # Add main trace for search volume
514
+ fig.add_trace(
515
+ go.Scatter(
516
+ x=[item["month"] for item in data],
517
+ y=[item["searchVolume"] for item in data],
518
+ name="Search Volume",
519
+ line=dict(color="#8884d8", width=3),
520
+ mode="lines+markers"
521
+ )
522
+ )
523
+
524
+ # Scale the other metrics to be visible on the same chart
525
+ max_volume = max([item["searchVolume"] for item in data])
526
+ scale_factor = max_volume / 100
527
+
528
+ # Add competition score (scaled)
529
+ fig.add_trace(
530
+ go.Scatter(
531
+ x=[item["month"] for item in data],
532
+ y=[item["competitionScore"] * scale_factor for item in data],
533
+ name="Competition Score",
534
+ line=dict(color="#82ca9d", width=2, dash="dot"),
535
+ mode="lines+markers"
536
+ )
537
+ )
538
+
539
+ # Add intent clarity (scaled)
540
+ fig.add_trace(
541
+ go.Scatter(
542
+ x=[item["month"] for item in data],
543
+ y=[item["intentClarity"] * scale_factor for item in data],
544
+ name="Intent Clarity",
545
+ line=dict(color="#ffc658", width=2, dash="dash"),
546
+ mode="lines+markers"
547
+ )
548
+ )
549
+
550
+ # Simple layout
551
+ fig.update_layout(
552
+ title=f"Keyword Evolution Forecast ({growth_scenario} Growth)",
553
+ xaxis_title="Month",
554
+ yaxis_title="Value",
555
+ legend=dict(orientation="h", y=1.1),
556
+ height=500
557
+ )
558
+
559
+ return fig
560
+
561
+ except Exception as e:
562
+ print(f"Error in chart creation: {str(e)}")
563
+ # Fallback to an even simpler chart
564
+ fig = go.Figure(data=go.Scatter(x=[1, 2, 3], y=[4, 1, 2]))
565
+ fig.update_layout(title="Fallback Chart (Error occurred)")
566
+ return fig
567
 
568
+ def create_ranking_history_chart(keyword_history):
569
+ """Create a chart showing keyword ranking history over time"""
570
+ try:
571
+ if not keyword_history or len(keyword_history) < 2:
572
+ # Not enough data for a meaningful chart
573
+ fig = go.Figure()
574
+ fig.update_layout(
575
+ title="Insufficient Ranking Data",
576
+ annotations=[{
577
+ "text": "Need at least 2 data points for ranking history",
578
+ "showarrow": False,
579
+ "font": {"size": 16},
580
+ "xref": "paper",
581
+ "yref": "paper",
582
+ "x": 0.5,
583
+ "y": 0.5
584
+ }]
585
+ )
586
+ return fig
587
+
588
+ # Create a figure
589
+ fig = go.Figure()
590
+
591
+ # Extract timestamps and convert to datetime objects
592
+ timestamps = [entry["timestamp"] for entry in keyword_history]
593
+ dates = [datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S") for ts in timestamps]
594
+
595
+ # Get unique domains from all results
596
+ all_domains = set()
597
+ for entry in keyword_history:
598
+ for result in entry["results"]:
599
+ all_domains.add(result["domain"])
600
+
601
+ # Colors for different domains
602
+ domain_colors = {}
603
+ color_palette = [
604
+ "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
605
+ "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
606
+ ]
607
+ for i, domain in enumerate(all_domains):
608
+ domain_colors[domain] = color_palette[i % len(color_palette)]
609
+
610
+ # Track domains and their positions over time
611
+ domain_tracking = {domain: {"x": [], "y": [], "text": []} for domain in all_domains}
612
+
613
+ for i, entry in enumerate(keyword_history):
614
+ for result in entry["results"]:
615
+ domain = result["domain"]
616
+ position = result["position"]
617
+ title = result["title"]
618
+
619
+ domain_tracking[domain]["x"].append(dates[i])
620
+ domain_tracking[domain]["y"].append(position)
621
+ domain_tracking[domain]["text"].append(title)
622
+
623
+ # Add traces for each domain
624
+ for domain, data in domain_tracking.items():
625
+ if len(data["x"]) > 0: # Only add domains that have data
626
+ fig.add_trace(
627
+ go.Scatter(
628
+ x=data["x"],
629
+ y=data["y"],
630
+ mode="lines+markers",
631
+ name=domain,
632
+ line=dict(color=domain_colors[domain]),
633
+ hovertemplate="%{text}<br>Position: %{y}<br>Date: %{x}<extra></extra>",
634
+ text=data["text"],
635
+ marker=dict(size=8)
636
+ )
637
+ )
638
+
639
+ # Update layout
640
+ fig.update_layout(
641
+ title="Keyword Ranking History",
642
+ xaxis_title="Date",
643
+ yaxis_title="Position",
644
+ yaxis=dict(autorange="reversed"), # Invert y-axis so position 1 is on top
645
+ hovermode="closest",
646
+ height=500
647
+ )
648
+
649
+ return fig
650
+
651
+ except Exception as e:
652
+ print(f"Error in create_ranking_history_chart: {str(e)}")
653
+ # Return fallback chart
654
+ fig = go.Figure()
655
+ fig.update_layout(
656
+ title="Error Creating Ranking Chart",
657
+ annotations=[{
658
+ "text": f"Error: {str(e)}",
659
+ "showarrow": False,
660
+ "font": {"size": 14},
661
+ "xref": "paper",
662
+ "yref": "paper",
663
+ "x": 0.5,
664
+ "y": 0.5
665
+ }]
666
+ )
667
+ return fig
668
 
669
+ def generate_serp_html(keyword, serp_results):
670
+ """Generate HTML for SERP results"""
671
+ if not serp_results:
672
+ return "<div>No SERP results available</div>"
 
673
 
674
+ html = f"""
675
+ <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
676
+ <h2 style="margin-top: 0;">SERP Results for "{keyword}"</h2>
677
+
678
+ <div style="background-color: #f5f5f5; padding: 10px; border-radius: 4px; margin-bottom: 20px;">
679
+ <div style="color: #666; font-size: 12px;">This is a simulated SERP. In a real application, this would use the Google API.</div>
680
+ </div>
681
+
682
+ <div class="serp-results" style="display: flex; flex-direction: column; gap: 16px;">
683
+ """
684
 
685
+ for result in serp_results:
686
+ position = result["position"]
687
+ title = result["title"]
688
+ url = result["url"]
689
+ snippet = result["snippet"]
690
+ domain = result["domain"]
691
+ ctr = result["ctr_estimate"]
692
+ impressions = result["impressions_estimate"]
693
+
694
+ html += f"""
695
+ <div class="serp-result" style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; position: relative;">
696
+ <div style="position: absolute; top: -10px; left: -10px; background-color: #4299e1; color: white; width: 24px; height: 24px; border-radius: 50%; display: flex; align-items: center; justify-content: center; font-size: 12px;">
697
+ {position}
698
+ </div>
699
+ <div style="margin-bottom: 5px;">
700
+ <a href="#" style="font-size: 18px; color: #1a73e8; text-decoration: none; font-weight: 500;">{title}</a>
701
+ </div>
702
+ <div style="margin-bottom: 8px; color: #006621; font-size: 14px;">{url}</div>
703
+ <div style="color: #4d5156; font-size: 14px;">{snippet}</div>
704
+
705
+ <div style="display: flex; margin-top: 10px; font-size: 12px; color: #666;">
706
+ <div style="margin-right: 15px;"><span style="font-weight: 500;">CTR:</span> {ctr:.2%}</div>
707
+ <div><span style="font-weight: 500;">Est. Impressions:</span> {impressions:,}</div>
708
+ </div>
709
+ </div>
710
+ """
711
+
712
+ html += """
713
+ </div>
714
+ </div>
715
+ """
716
+
717
+ return html
718
 
719
+ def generate_token_visualization_html(token_analysis, full_analysis):
720
+ """Generate HTML for token visualization"""
721
+ html = """
722
+ <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
723
+ <h2 style="margin-top: 0;">Token Visualization</h2>
724
+
725
+ <div style="margin-bottom: 20px; padding: 15px; background-color: #f8f9fa; border-radius: 6px;">
726
+ <div style="margin-bottom: 8px; font-weight: bold; color: #4a5568;">Human View:</div>
727
+ <div style="display: flex; flex-wrap: wrap; gap: 8px;">
728
+ """
729
+
730
+ # Add human view tokens
731
+ for token in token_analysis:
732
+ html += f"""
733
+ <div style="padding: 6px 12px; background-color: white; border: 1px solid #cbd5e0; border-radius: 4px;">
734
+ {token['text']}
735
+ </div>
736
+ """
737
+
738
+ html += """
739
+ </div>
740
+ </div>
741
+
742
+ <div style="text-align: center; margin: 15px 0;">
743
+ <span style="font-size: 20px;">↓</span>
744
+ </div>
745
+
746
+ <div style="padding: 15px; background-color: #f0fff4; border-radius: 6px;">
747
+ <div style="margin-bottom: 8px; font-weight: bold; color: #2f855a;">Machine View:</div>
748
+ <div style="display: flex; flex-wrap: wrap; gap: 8px;">
749
+ """
750
+
751
+ # Add machine view tokens
752
+ for token in full_analysis:
753
+ bg_color = get_token_colors(token["type"])
754
+ html += f"""
755
+ <div style="padding: 6px 12px; background-color: {bg_color}; border: 1px solid #a0aec0; border-radius: 4px; font-family: monospace;">
756
+ {token['token']}
757
+ <span style="font-size: 10px; opacity: 0.7; display: block;">{token['type']}</span>
758
+ </div>
759
+ """
760
+
761
+ html += """
762
+ </div>
763
+ </div>
764
+
765
+ <div style="margin-top: 20px; display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; text-align: center;">
766
+ """
767
+
768
+ # Add stats
769
+ word_count = len(token_analysis)
770
+ token_count = len(full_analysis)
771
+ ratio = round(token_count / max(1, word_count), 2)
772
+
773
+ html += f"""
774
+ <div style="background-color: #ebf8ff; padding: 10px; border-radius: 6px;">
775
+ <div style="font-size: 24px; font-weight: bold; color: #3182ce;">{word_count}</div>
776
+ <div style="font-size: 14px; color: #4299e1;">Words</div>
777
+ </div>
778
+
779
+ <div style="background-color: #f0fff4; padding: 10px; border-radius: 6px;">
780
+ <div style="font-size: 24px; font-weight: bold; color: #38a169;">{token_count}</div>
781
+ <div style="font-size: 14px; color: #48bb78;">Tokens</div>
782
+ </div>
783
+
784
+ <div style="background-color: #faf5ff; padding: 10px; border-radius: 6px;">
785
+ <div style="font-size: 24px; font-weight: bold; color: #805ad5;">{ratio}</div>
786
+ <div style="font-size: 14px; color: #9f7aea;">Tokens per Word</div>
787
+ </div>
788
+ """
789
+
790
+ html += """
791
+ </div>
792
+ </div>
793
+ """
794
+
795
+ return html
796
 
797
+ def generate_full_analysis_html(keyword, token_analysis, intent_analysis, evolution_potential, trends):
798
+ """Generate HTML for full keyword analysis"""
799
+ html = f"""
800
+ <div style="font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
801
+ <h2 style="margin-top: 0;">Keyword DNA Analysis for: {keyword}</h2>
802
+
803
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;">
804
+ <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
805
+ <h3 style="margin-top: 0; font-size: 16px;">Intent Gene</h3>
806
+ <div style="display: flex; justify-content: space-between; margin-bottom: 10px;">
807
+ <span>Type:</span>
808
+ <span>{intent_analysis['type']}</span>
809
+ </div>
810
+ <div style="display: flex; justify-content: space-between; align-items: center;">
811
+ <span>Strength:</span>
812
+ <div style="width: 120px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
813
+ <div style="height: 100%; background-color: #48bb78; width: {intent_analysis['strength']}%;"></div>
814
+ </div>
815
+ </div>
816
+ </div>
817
+
818
+ <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px;">
819
+ <h3 style="margin-top: 0; font-size: 16px;">Evolution Potential</h3>
820
+ <div style="display: flex; justify-content: center; align-items: center; height: 100px;">
821
+ <div style="position: relative; width: 100px; height: 100px;">
822
+ <div style="position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;">
823
+ <span style="font-size: 24px; font-weight: bold;">{evolution_potential}</span>
824
+ </div>
825
+ <svg width="100" height="100" viewBox="0 0 36 36">
826
+ <path
827
+ d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
828
+ fill="none"
829
+ stroke="#4CAF50"
830
+ stroke-width="3"
831
+ stroke-dasharray="{evolution_potential}, 100"
832
+ />
833
+ </svg>
834
+ </div>
835
+ </div>
836
+ </div>
837
+ </div>
838
+
839
+ <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 20px;">
840
+ <h3 style="margin-top: 0; font-size: 16px;">Future Mutations</h3>
841
+ <div style="display: flex; flex-direction: column; gap: 8px;">
842
+ """
843
+
844
+ # Add trends
845
+ for trend in trends:
846
+ html += f"""
847
+ <div style="display: flex; align-items: center; gap: 8px;">
848
+ <span style="color: #48bb78;">β†—</span>
849
+ <span>{trend}</span>
850
+ </div>
851
+ """
852
+
853
+ html += """
854
+ </div>
855
+ </div>
856
+
857
+ <h3 style="margin-bottom: 10px;">Token Details & Historical Analysis</h3>
858
+ """
859
+
860
+ # Add token details
861
+ for token in token_analysis:
862
+ html += f"""
863
+ <div style="padding: 15px; border: 1px solid #e2e8f0; border-radius: 6px; margin-bottom: 15px;">
864
+ <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px;">
865
+ <div style="display: flex; align-items: center; gap: 8px;">
866
+ <span style="font-size: 18px; font-weight: medium;">{token['token']}</span>
867
+ <span style="padding: 2px 8px; background-color: #edf2f7; border-radius: 4px; font-size: 12px;">{token['posTag']}</span>
868
+ """
869
+
870
+ if token['entityType']:
871
+ html += f"""
872
+ <span style="padding: 2px 8px; background-color: #ebf8ff; color: #3182ce; border-radius: 4px; font-size: 12px; display: flex; align-items: center;">
873
+ β“˜ {token['entityType']}
874
+ </span>
875
+ """
876
+
877
+ html += f"""
878
+ </div>
879
+ <div style="display: flex; align-items: center; gap: 4px;">
880
+ <span style="font-size: 12px; color: #718096;">Importance:</span>
881
+ <div style="width: 64px; height: 8px; background-color: #edf2f7; border-radius: 4px; overflow: hidden;">
882
+ <div style="height: 100%; background-color: #4299e1; width: {token['importance']}%;"></div>
883
+ </div>
884
+ </div>
885
+ </div>
886
+
887
+ <div style="margin-top: 15px;">
888
+ <div style="font-size: 12px; color: #718096; margin-bottom: 4px;">Historical Relevance:</div>
889
+ <div style="border: 1px solid #e2e8f0; border-radius: 4px; padding: 10px; background-color: #f7fafc;">
890
+ <div style="font-size: 12px; margin-bottom: 8px;">
891
+ <span style="font-weight: 500;">Origin: </span>
892
+ <span>{token['origin']['era']}, </span>
893
+ <span style="font-style: italic;">{token['origin']['language']}</span>
894
+ </div>
895
+ <div style="font-size: 12px; margin-bottom: 12px;">{token['origin']['note']}</div>
896
+
897
+ <div style="display: flex; align-items: flex-end; height: 50px; gap: 4px; margin-top: 8px;">
898
+ """
899
+
900
+ # Add historical data bars
901
+ for period, value in token['historicalData']:
902
+ opacity = 0.3 + (token['historicalData'].index((period, value)) * 0.1)
903
+ html += f"""
904
+ <div style="display: flex; flex-direction: column; align-items: center; flex: 1;">
905
+ <div style="width: 100%; background-color: rgba(66, 153, 225, {opacity}); border-radius: 2px 2px 0 0; height: {max(4, value)}%;"></div>
906
+ <div style="font-size: 9px; margin-top: 4px; color: #718096; transform: rotate(45deg); transform-origin: top left; white-space: nowrap;">
907
+ {period}
908
+ </div>
909
+ </div>
910
+ """
911
+
912
+ html += """
913
+ </div>
914
+ </div>
915
+ </div>
916
+ </div>
917
+ """
918
+
919
+ html += """
920
+ </div>
921
+ """
922
+
923
+ return html
924
 
925
+ def analyze_keyword(keyword, forecast_months=6, growth_scenario="Moderate", get_serp=False, progress=gr.Progress()):
926
+ """Main function to analyze a keyword"""
927
+ if not keyword or not keyword.strip():
928
+ return (
929
+ "<div>Please enter a keyword to analyze</div>",
930
+ "<div>Please enter a keyword to analyze</div>",
931
+ None,
932
+ None,
933
+ None,
934
+ None,
935
+ None
936
+ )
937
+
938
+ progress(0.1, desc="Starting analysis...")
939
+
940
+ # Load models if not already loaded
941
+ model_status = load_models(progress)
942
+ if isinstance(model_status, str) and model_status.startswith("Error"):
943
+ return (
944
+ f"<div style='color:red;'>{model_status}</div>",
945
+ f"<div style='color:red;'>{model_status}</div>",
946
+ None,
947
+ None,
948
+ None,
949
+ None,
950
+ None
951
+ )
952
+
953
+ try:
954
+ # Basic tokenization - just split on spaces for simplicity
955
+ words = keyword.strip().lower().split()
956
+ progress(0.2, desc="Analyzing tokens...")
957
+
958
+ # Get token types
959
+ token_analysis = analyze_token_types(words)
960
+
961
+ progress(0.3, desc="Running NER...")
962
+ # Get NER tags - handle potential errors
963
+ try:
964
+ ner_results = ner_pipeline(keyword)
965
+ except Exception as e:
966
+ print(f"NER error: {str(e)}")
967
+ ner_results = []
968
+
969
+ progress(0.4, desc="Running POS tagging...")
970
+ # Get POS tags - handle potential errors
971
+ try:
972
+ pos_results = pos_pipeline(keyword)
973
+ except Exception as e:
974
+ print(f"POS error: {str(e)}")
975
+ pos_results = []
976
+
977
+ # Process and organize results
978
+ full_token_analysis = []
979
+ for token in token_analysis:
980
+ # Find POS tag for this token
981
+ pos_tag = "NOUN" # Default
982
+ for pos_result in pos_results:
983
+ if pos_result["word"].lower() == token["text"]:
984
+ pos_tag = pos_result["entity"]
985
+ break
986
+
987
+ # Find entity type if any
988
+ entity_type = None
989
+ for ner_result in ner_results:
990
+ if ner_result["word"].lower() == token["text"]:
991
+ entity_type = ner_result["entity"]
992
+ break
993
+
994
+ # Generate historical data
995
+ historical_data = simulate_historical_data(token["text"])
996
+
997
+ # Generate origin data
998
+ origin = generate_origin_data(token["text"])
999
+
1000
+ # Calculate importance (simplified algorithm)
1001
+ importance = 60 + (len(token["text"]) * 2)
1002
+ importance = min(95, importance)
1003
+
1004
+ # Generate more meaningful related terms using semantic similarity
1005
+ if semantic_model is not None:
1006
+ try:
1007
+ # Generate some potential related terms
1008
+ prefix_related = [f"about {token['text']}", f"what is {token['text']}", f"how to {token['text']}"]
1009
+ synonym_candidates = ["similar", "equivalent", "comparable", "like", "related", "alternative"]
1010
+ domain_terms = ["software", "marketing", "business", "science", "education", "technology"]
1011
+ comparison_terms = prefix_related + synonym_candidates + domain_terms
1012
+
1013
+ # Get similarities
1014
+ similarities = get_semantic_similarity(token['text'], comparison_terms)
1015
+
1016
+ # Use top 3 most similar terms
1017
+ related_terms = [term for term, score in similarities[:3]]
1018
+ except Exception as e:
1019
+ print(f"Error generating semantic related terms: {str(e)}")
1020
+ related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
1021
+ else:
1022
+ # Fallback if semantic model isn't loaded
1023
+ related_terms = [f"{token['text']}-related-1", f"{token['text']}-related-2"]
1024
+
1025
+ full_token_analysis.append({
1026
+ "token": token["text"],
1027
+ "type": token["type"],
1028
+ "posTag": pos_tag,
1029
+ "entityType": entity_type,
1030
+ "importance": importance,
1031
+ "historicalData": historical_data,
1032
+ "origin": origin,
1033
+ "relatedTerms": related_terms
1034
+ })
1035
+
1036
+ progress(0.5, desc="Analyzing intent...")
1037
+ # Intent analysis - handle potential errors
1038
+ try:
1039
+ intent_result = intent_classifier(
1040
+ keyword,
1041
+ candidate_labels=["informational", "navigational", "transactional"]
1042
+ )
1043
+
1044
+ intent_analysis = {
1045
+ "type": intent_result["labels"][0].capitalize(),
1046
+ "strength": round(intent_result["scores"][0] * 100),
1047
+ "mutations": [
1048
+ f"{intent_result['labels'][0]}-variation-1",
1049
+ f"{intent_result['labels'][0]}-variation-2"
1050
+ ]
1051
+ }
1052
+ except Exception as e:
1053
+ print(f"Intent classification error: {str(e)}")
1054
+ intent_analysis = {
1055
+ "type": "Informational", # Default fallback
1056
+ "strength": 70,
1057
+ "mutations": ["fallback-variation-1", "fallback-variation-2"]
1058
+ }
1059
+
1060
+ # Evolution potential (simplified calculation)
1061
+ evolution_potential = min(95, 65 + (len(keyword) % 30))
1062
+
1063
+ # Predicted trends (simplified)
1064
+ trends = [
1065
+ "Voice search adaptation",
1066
+ "Visual search integration"
1067
+ ]
1068
+
1069
+ # Generate more realistic and keyword-specific evolution data
1070
+ base_volume = 1000 + (len(keyword) * 100)
1071
+
1072
+ # Adjust growth factor based on scenario
1073
+ if growth_scenario == "Conservative":
1074
+ growth_factor = 1.05 + (0.02 * (sum(ord(c) for c in keyword) % 5))
1075
+ elif growth_scenario == "Aggressive":
1076
+ growth_factor = 1.15 + (0.05 * (sum(ord(c) for c in keyword) % 5))
1077
+ else: # Moderate
1078
+ growth_factor = 1.1 + (0.03 * (sum(ord(c) for c in keyword) % 5))
1079
+
1080
+ evolution_data = []
1081
+ months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][:int(forecast_months)]
1082
+ current_volume = base_volume
1083
+
1084
+ for month in months:
1085
+ # Add some randomness to make it look more realistic
1086
+ np.random.seed(sum(ord(c) for c in month + keyword))
1087
+ random_factor = 0.9 + (0.2 * np.random.random())
1088
+ current_volume *= growth_factor * random_factor
1089
+
1090
+ evolution_data.append({
1091
+ "month": month,
1092
+ "searchVolume": int(current_volume),
1093
+ "competitionScore": min(95, 45 + (months.index(month) * 3) + (sum(ord(c) for c in keyword) % 10)),
1094
+ "intentClarity": min(95, 80 + (months.index(month) * 2) + (sum(ord(c) for c in keyword) % 5))
1095
+ })
1096
+
1097
+ progress(0.6, desc="Creating visualizations...")
1098
+ # Create interactive evolution chart
1099
+ evolution_chart = create_evolution_chart(evolution_data, forecast_months, growth_scenario)
1100
+
1101
+ # SERP results and ranking history (new feature)
1102
+ serp_results = None
1103
+ ranking_chart = None
1104
+ serp_html = None
1105
+
1106
+ if get_serp:
1107
+ progress(0.7, desc="Fetching SERP data...")
1108
+ # Get SERP results
1109
+ serp_results = simulate_google_serp(keyword)
1110
+
1111
+ # Update ranking history
1112
+ update_ranking_history(keyword, serp_results)
1113
+
1114
+ progress(0.8, desc="Creating ranking charts...")
1115
+ # Create ranking history chart
1116
+ if keyword in ranking_history and len(ranking_history[keyword]) > 0:
1117
+ ranking_chart = create_ranking_history_chart(ranking_history[keyword])
1118
+
1119
+ # Generate SERP HTML
1120
+ serp_html = generate_serp_html(keyword, serp_results)
1121
+
1122
+ # Generate HTML for token visualization
1123
+ token_viz_html = generate_token_visualization_html(token_analysis, full_token_analysis)
1124
+
1125
+ # Generate HTML for full analysis
1126
+ analysis_html = generate_full_analysis_html(
1127
+ keyword,
1128
+ full_token_analysis,
1129
+ intent_analysis,
1130
+ evolution_potential,
1131
+ trends
1132
+ )
1133
+
1134
+ # Generate JSON results
1135
+ json_results = {
1136
+ "keyword": keyword,
1137
+ "tokenAnalysis": full_token_analysis,
1138
+ "intentAnalysis": intent_analysis,
1139
+ "evolutionPotential": evolution_potential,
1140
+ "predictedTrends": trends,
1141
+ "forecast": {
1142
+ "months": forecast_months,
1143
+ "scenario": growth_scenario,
1144
+ "data": evolution_data
1145
+ },
1146
+ "serpResults": serp_results
1147
+ }
1148
+
1149
+ progress(1.0, desc="Analysis complete!")
1150
+ return token_viz_html, analysis_html, json_results, evolution_chart, serp_html, ranking_chart, keyword
1151
+
1152
+ except Exception as e:
1153
+ error_message = f"<div style='color:red;padding:20px;'>Error analyzing keyword: {str(e)}</div>"
1154
+ print(f"Error in analyze_keyword: {str(e)}")
1155
+ return error_message, error_message, None, None, None, None, None
1156
 
1157
+ # Create the Gradio interface with AI Snipper styling
1158
  with gr.Blocks(
1159
  css=ai_snipper_css,
1160
  title="🧬 AI Snipper Keyword DNA Analyzer",
 
1168
 
1169
  # Custom header with DNA theme
1170
  gr.HTML("""
1171
+ <div style="text-align: center; padding: 2rem 0; margin-bottom: 2rem;">
1172
+ <h1 style="font-size: 3rem; font-weight: 800; margin-bottom: 1rem; background: linear-gradient(135deg, #06b6d4, #3b82f6, #8b5cf6); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;">
1173
+ 🧬 Keyword DNA Analyzer
1174
+ </h1>
1175
+ <p style="font-size: 1.2rem; color: #94a3b8; margin-top: 1rem; font-weight: 400;">
1176
  Decode the genetic structure of your keywords with AI-powered analysis
1177
  </p>
1178
  </div>
 
1223
 
1224
  # Status indicator with custom styling
1225
  status_html = gr.HTML(
1226
+ '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(6, 182, 212, 0.1); border: 1px solid #06b6d4; color: #06b6d4;">πŸš€ Enter a keyword and click "Analyze DNA" to begin</div>'
1227
  )
1228
 
1229
  # Main analyze button
1230
  analyze_btn = gr.Button(
1231
  "🧬 Analyze DNA",
1232
+ variant="primary"
 
1233
  )
1234
 
1235
  # Example buttons with custom styling
1236
  gr.Markdown("### πŸ’‘ Try These Examples")
1237
+ with gr.Row():
1238
  example_btns = []
1239
  examples = [
1240
  "preprocessing",
 
1266
  with gr.Tab("πŸ’Ύ Raw Data"):
1267
  json_output = gr.JSON()
1268
 
1269
+ # Event handlers
1270
  voice_submit_btn.click(
1271
  handle_voice_input,
1272
  inputs=[audio_input],
 
1275
 
1276
  # Updated status messages with custom styling
1277
  analyze_btn.click(
1278
+ lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(6, 182, 212, 0.1); border: 1px solid #06b6d4; color: #06b6d4;">πŸ”„ Loading models and analyzing... This may take a moment.</div>',
1279
  outputs=status_html
1280
  ).then(
1281
  analyze_keyword,
1282
  inputs=[input_text, forecast_months, growth_scenario, include_serp],
1283
  outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
1284
  ).then(
1285
+ lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(20, 184, 166, 0.1); border: 1px solid #14b8a6; color: #14b8a6;">βœ… Analysis complete! Check the results above.</div>',
1286
  outputs=status_html
1287
  )
1288
 
 
1297
  inputs=[btn],
1298
  outputs=[input_text]
1299
  ).then(
1300
+ lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(6, 182, 212, 0.1); border: 1px solid #06b6d4; color: #06b6d4;">πŸ”„ Loading models and analyzing... This may take a moment.</div>',
1301
  outputs=status_html
1302
  ).then(
1303
  analyze_keyword,
1304
  inputs=[input_text, forecast_months, growth_scenario, include_serp],
1305
  outputs=[token_viz_html, analysis_html, json_output, evolution_chart, serp_html, ranking_chart, input_text]
1306
  ).then(
1307
+ lambda: '<div style="text-align: center; padding: 1rem; border-radius: 8px; margin: 1rem 0; font-weight: 500; background: rgba(20, 184, 166, 0.1); border: 1px solid #14b8a6; color: #14b8a6;">βœ… Analysis complete! Check the results above.</div>',
1308
  outputs=status_html
1309
  )
1310
 
1311
  # Launch configuration
1312
  if __name__ == "__main__":
1313
+ demo.launch()