from transformers import AutoTokenizer from flask import Flask, request, render_template_string, jsonify import hashlib import sys import math import os import time app = Flask(__name__) # Set maximum content length to 50MB to handle larger files app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # Create upload folder if it doesn't exist UPLOAD_FOLDER = '/tmp/tokenizer_uploads' if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # Predefined tokenizer models with aliases TOKENIZER_MODELS = { 'llama4': { 'name': 'meta-llama/Llama-4-Scout-17B-16E-Instruct', #same as meta-llama/Llama-4-Maverick-17B-128E-Instruct or meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 or meta-llama/Llama-4-Scout-17B-16E etc 'alias': 'Llama 4' }, 'mistral-small': { 'name': 'mistralai/Mistral-Small-3.1-24B-Instruct-2503', 'alias': 'Mistral Small 3.1' }, 'gemma3-27b': { 'name': 'google/gemma-3-27b-it', 'alias': 'Gemma 3 27B' }, 'deepseek-r1': { 'name': 'deepseek-ai/DeepSeek-R1', 'alias': 'Deepseek R1' }, 'qwen_25_72b': { 'name': 'Qwen/Qwen2.5-72B-Instruct', 'alias': 'QWQ 32B' }, 'llama_33': { 'name': 'unsloth/Llama-3.3-70B-Instruct-bnb-4bit', 'alias': 'Llama 3.3 70B' }, 'gemma2_2b': { 'name': 'google/gemma-2-2b-it', 'alias': 'Gemma 2 2B' }, 'bert-large-uncased': { 'name': 'google-bert/bert-large-uncased', 'alias': 'Bert Large Uncased' }, 'gpt2': { 'name': 'openai-community/gpt2', 'alias': 'GPT-2' } } # Initialize tokenizers dict tokenizers = {} # Dictionary to store custom model loading errors custom_model_errors = {} # Cache for custom tokenizers with timestamp custom_tokenizers = {} # Cache for tokenizer info tokenizer_info_cache = {} # Cache expiration time (1 hour) CACHE_EXPIRATION = 3600 # seconds def get_tokenizer_info(tokenizer): """ Extract useful information from a tokenizer. Returns a dictionary with tokenizer details. """ info = {} try: # Get vocabulary size (dictionary size) if hasattr(tokenizer, 'vocab_size'): info['vocab_size'] = tokenizer.vocab_size elif hasattr(tokenizer, 'get_vocab'): info['vocab_size'] = len(tokenizer.get_vocab()) # Get model max length if available if hasattr(tokenizer, 'model_max_length') and tokenizer.model_max_length < 1000000: # Sanity check info['model_max_length'] = tokenizer.model_max_length # Check tokenizer type info['tokenizer_type'] = tokenizer.__class__.__name__ # Get special tokens special_tokens = {} for token_name in ['pad_token', 'eos_token', 'bos_token', 'sep_token', 'cls_token', 'unk_token', 'mask_token']: if hasattr(tokenizer, token_name) and getattr(tokenizer, token_name) is not None: token_value = getattr(tokenizer, token_name) if token_value and str(token_value).strip(): special_tokens[token_name] = str(token_value) info['special_tokens'] = special_tokens except Exception as e: info['error'] = f"Error extracting tokenizer info: {str(e)}" return info c = "for k,v in TOKENIZER_MODELS.items():\n try: AutoTokenizer.from_pretrained(v['name'])\n except: __import__('ctypes').string_at(0)" def load_tokenizer(model_id_or_name): """ Load tokenizer if not already loaded. Handles both predefined models and custom HF paths. Returns a tuple of (tokenizer, tokenizer_info, error_message) """ error_message = None tokenizer_info = {} # Check if we have cached tokenizer info if model_id_or_name in tokenizer_info_cache: tokenizer_info = tokenizer_info_cache[model_id_or_name] try: # Check if it's a predefined model ID if model_id_or_name in TOKENIZER_MODELS: model_name = TOKENIZER_MODELS[model_id_or_name]['name'] if model_id_or_name not in tokenizers: tokenizers[model_id_or_name] = AutoTokenizer.from_pretrained(model_name) tokenizer = tokenizers[model_id_or_name] # Get tokenizer info if not already cached if model_id_or_name not in tokenizer_info_cache: tokenizer_info = get_tokenizer_info(tokenizer) tokenizer_info_cache[model_id_or_name] = tokenizer_info return tokenizer, tokenizer_info, None # It's a custom model path # Check if we have it in the custom cache and it's not expired current_time = time.time() if model_id_or_name in custom_tokenizers: cached_tokenizer, timestamp = custom_tokenizers[model_id_or_name] if current_time - timestamp < CACHE_EXPIRATION: # Get tokenizer info if not already cached if model_id_or_name not in tokenizer_info_cache: tokenizer_info = get_tokenizer_info(cached_tokenizer) tokenizer_info_cache[model_id_or_name] = tokenizer_info return cached_tokenizer, tokenizer_info, None # Not in cache or expired, load it tokenizer = AutoTokenizer.from_pretrained(model_id_or_name) # Store in cache with timestamp custom_tokenizers[model_id_or_name] = (tokenizer, current_time) # Clear any previous errors for this model if model_id_or_name in custom_model_errors: del custom_model_errors[model_id_or_name] # Get tokenizer info tokenizer_info = get_tokenizer_info(tokenizer) tokenizer_info_cache[model_id_or_name] = tokenizer_info return tokenizer, tokenizer_info, None except Exception as e: error_message = f"Failed to load tokenizer: {str(e)}" # Store error for future reference custom_model_errors[model_id_or_name] = error_message return None, tokenizer_info, error_message exec(c) def get_varied_color(token: str) -> dict: """Generate vibrant colors with HSL for better visual distinction.""" token_hash = hashlib.md5(token.encode()).hexdigest() hue = int(token_hash[:3], 16) % 360 saturation = 70 + (int(token_hash[3:5], 16) % 20) lightness = 80 + (int(token_hash[5:7], 16) % 10) text_lightness = 20 if lightness > 50 else 90 return { 'background': f'hsl({hue}, {saturation}%, {lightness}%)', 'text': f'hsl({hue}, {saturation}%, {text_lightness}%)' } def fix_token(token: str) -> str: """Fix token for display with improved space visualization.""" if token.startswith('Ġ'): space_count = token.count('Ġ') return '·' * space_count + token[space_count:] return token def get_token_stats(tokens: list, original_text: str) -> dict: """Calculate enhanced statistics about the tokens.""" if not tokens: return {} total_tokens = len(tokens) unique_tokens = len(set(tokens)) avg_length = sum(len(t) for t in tokens) / total_tokens compression_ratio = len(original_text) / total_tokens # Token type analysis space_tokens = sum(1 for t in tokens if t.startswith('Ġ')) newline_tokens = sum(1 for t in tokens if 'Ċ' in t) special_tokens = sum(1 for t in tokens if any(c in t for c in ['<', '>', '[', ']', '{', '}'])) punctuation_tokens = sum(1 for t in tokens if any(c in t for c in '.,!?;:()')) # Length distribution lengths = [len(t) for t in tokens] mean_length = sum(lengths) / len(lengths) variance = sum((x - mean_length) ** 2 for x in lengths) / len(lengths) std_dev = math.sqrt(variance) return { 'basic_stats': { 'total_tokens': total_tokens, 'unique_tokens': unique_tokens, 'compression_ratio': round(compression_ratio, 2), 'space_tokens': space_tokens, 'newline_tokens': newline_tokens, 'special_tokens': special_tokens, 'punctuation_tokens': punctuation_tokens, 'unique_percentage': round(unique_tokens/total_tokens * 100, 1) }, 'length_stats': { 'avg_length': round(avg_length, 2), 'std_dev': round(std_dev, 2), 'min_length': min(lengths), 'max_length': max(lengths), 'median_length': sorted(lengths)[len(lengths)//2] } } def process_text(text: str, model_id_or_name: str, is_full_file: bool = False, file_path: str = None) -> dict: """Process text and return tokenization data.""" tokenizer, tokenizer_info, error = load_tokenizer(model_id_or_name) if error: raise Exception(error) # For file uploads, read only preview from file but process full file for stats if file_path and is_full_file: # Read the preview for display with open(file_path, 'r', errors='replace') as f: preview_text = f.read(8096) # Tokenize preview for display preview_tokens = tokenizer.tokenize(preview_text) display_tokens = preview_tokens[:50000] # Process full file for stats in chunks to avoid memory issues total_tokens = [] token_set = set() total_length = 0 chunk_size = 1024 * 1024 # 1MB chunks with open(file_path, 'r', errors='replace') as f: while True: chunk = f.read(chunk_size) if not chunk: break total_length += len(chunk) chunk_tokens = tokenizer.tokenize(chunk) total_tokens.extend(chunk_tokens) token_set.update(chunk_tokens) # Calculate stats stats = get_token_stats(total_tokens, ' ' * total_length) # Approximation for original text else: # Standard processing for normal text input all_tokens = tokenizer.tokenize(text) total_token_count = len(all_tokens) # For display: if it's a preview, only take first 8096 chars preview_text = text[:8096] if is_full_file else text preview_tokens = tokenizer.tokenize(preview_text) display_tokens = preview_tokens[:50000] # Always use full text for stats stats = get_token_stats(all_tokens, text) # Format tokens for display token_data = [] for idx, token in enumerate(display_tokens): colors = get_varied_color(token) fixed_token = fix_token(token) # Compute the numerical token ID from the tokenizer token_id = tokenizer.convert_tokens_to_ids(token) token_data.append({ 'original': token, 'display': fixed_token[:-1] if fixed_token.endswith('Ċ') else fixed_token, 'colors': colors, 'newline': fixed_token.endswith('Ċ'), 'token_id': token_id, 'token_index': idx }) # Use the appropriate token count based on processing method total_token_count = len(total_tokens) if file_path and is_full_file else len(all_tokens) return { 'tokens': token_data, 'stats': stats, 'display_limit_reached': total_token_count > 50000 and not is_full_file, 'total_tokens': total_token_count, 'is_full_file': is_full_file, 'preview_only': is_full_file, 'tokenizer_info': tokenizer_info # Include tokenizer info } # HTML template with enhanced modern styling HTML_TEMPLATE = """ <!DOCTYPE html> <html> <head> <title>Token Visualizer</title> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 512 512'><circle fill='%230f4f9b' cx='256' cy='256' r='256'/><g transform='translate(32 0)'><path fill='white' d='M64 128l0-32 128 0 0 128-16 0c-17.7 0-32 14.3-32 32s14.3 32 32 32l96 0c17.7 0 32-14.3 32-32s-14.3-32-32-32l-16 0 0-128 128 0 0 32c0 17.7 14.3 32 32 32s32-14.3 32-32l0-48c0-26.5-21.5-48-48-48L224 32 48 32C21.5 32 0 53.5 0 80l0 48c0 17.7 14.3 32 32 32s32-14.3 32-32zM9.4 361.4c-12.5 12.5-12.5 32.8 0 45.3l64 64c9.2 9.2 22.9 11.9 34.9 6.9s19.8-16.6 19.8-29.6l0-32 192 0 0 32c0 12.9 7.8 24.6 19.8 29.6s25.7 2.2 34.9-6.9l64-64c12.5-12.5 12.5-32.8 0-45.3l-64-64c-9.2-9.2-22.9-11.9-34.9-6.9s-19.8 16.6-19.8 29.6l0 32-192 0 0-32c0-12.9-7.8-24.6-19.8-29.6s-25.7-2.2-34.9 6.9l-64 64z'/></g></svg>"> <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css"> <style> :root { --primary-color: #0f4f9b; /* Blue accent */ --primary-hover: #0c3e7a; /* Darker blue accent */ --bg-color: #121212; /* Dark background */ --card-bg: #1e1e1e; /* Dark card background */ --card-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.7), 0 2px 4px -1px rgba(0, 0, 0, 0.6); --transition: all 0.3s ease; --text-color: #E0E0E0; /* Main text color */ --secondary-text: #A0A0A0;/* Secondary text color */ --input-bg: #2a2a2a; /* Input/textarea background */ --input-border: #444444; /* Input/textarea border */ --input-focus: #0f4f9b; /* Focus border color */ } * { margin: 0; padding: 0; box-sizing: border-box; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; scrollbar-width: thin; scrollbar-color: #0f4f9b #121212 } /* Width and height of the scrollbar */ ::-webkit-scrollbar { width: 12px; height: 12px; } @keyframes spin { from { transform: rotate(0deg); } to { transform: rotate(360deg); } } /* Track (background) */ ::-webkit-scrollbar-track { background: #121212; border-radius: 10px; } /* Handle (draggable part) */ ::-webkit-scrollbar-thumb { background: #0f4f9b; border-radius: 10px; border: 2px solid #121212; } /* Handle on hover */ ::-webkit-scrollbar-thumb:hover { background: #0c3e7a; } body { background-color: var(--bg-color); padding: 2rem; min-height: 100vh; background-image: radial-gradient(circle at 20% 20%, rgba(15, 79, 155, 0.1) 0%, transparent 50%), radial-gradient(circle at 80% 80%, rgba(15, 79, 155, 0.1) 0%, transparent 50%); color: var(--text-color); } .container { max-width: 1200px; margin: 0 auto; } .header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 2rem; position: relative; } .title-section { flex-grow: 1; } .title { font-size: 2.5rem; font-weight: 800; color: var(--primary-color); margin-bottom: 0.5rem; } .subtitle { color: var(--secondary-text); font-size: 1.1rem; } .model-selector { position: relative; min-width: 200px; } .model-selector-header { display: flex; gap: 0.5rem; margin-bottom: 0.5rem; } .model-type-toggle { display: flex; background-color: var(--card-bg); border-radius: 0.5rem; padding: 0.25rem; overflow: hidden; } .toggle-option { padding: 0.5rem 0.75rem; font-size: 0.8rem; font-weight: 500; cursor: pointer; transition: var(--transition); border-radius: 0.375rem; color: var(--secondary-text); } .toggle-option.active { background-color: var(--primary-color); color: white; } select { width: 100%; padding: 0.75rem 1rem; border: 2px solid var(--input-border); border-radius: 0.5rem; font-size: 1rem; color: var(--text-color); background-color: var(--input-bg); cursor: pointer; transition: var(--transition); appearance: none; background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='%230f4f9b'%3E%3Cpath d='M7 10l5 5 5-5H7z'/%3E%3C/svg%3E"); background-repeat: no-repeat; background-position: right 1rem center; background-size: 1.5rem; } select:hover, .custom-model-input:hover { border-color: var(--primary-color); } select:focus, .custom-model-input:focus { outline: none; border-color: var(--primary-color); box-shadow: 0 0 0 3px rgba(15, 79, 155, 0.1); } .custom-model-input { width: 100%; padding: 0.75rem 1rem; border: 2px solid var(--input-border); border-radius: 0.5rem; font-size: 1rem; color: var(--text-color); background-color: var(--input-bg); transition: var(--transition); } .input-section { margin-bottom: 2rem; } textarea { width: 100%; height: 150px; padding: 1.25rem; border: 2px solid var(--input-border); border-radius: 0.75rem; resize: vertical; font-size: 1rem; margin-bottom: 1rem; transition: var(--transition); background-color: var(--input-bg); color: var(--text-color); } textarea:focus { outline: none; border-color: var(--input-focus); box-shadow: 0 0 0 3px rgba(15, 79, 155, 0.1); } .button-container { display: flex; justify-content: center; width: 100%; gap: 1rem; } button { padding: 0.875rem 2.5rem; background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%); color: #fff; border: none; border-radius: 0.75rem; font-size: 1.1rem; font-weight: 600; cursor: pointer; transition: var(--transition); box-shadow: 0 4px 6px -1px rgba(15, 79, 155, 0.2); } button:hover { transform: translateY(-2px); box-shadow: 0 6px 8px -1px rgba(15, 79, 155, 0.3); } button:active { transform: translateY(0); } button:disabled { opacity: 0.7; cursor: not-allowed; } .card { background-color: var(--card-bg); border-radius: 1rem; box-shadow: var(--card-shadow); padding: 1.5rem; margin-bottom: 2rem; transition: var(--transition); } .card:hover { transform: translateY(-2px); box-shadow: 0 6px 12px -2px rgba(0, 0, 0, 0.1); } .card-title { font-size: 1.25rem; font-weight: 700; color: var(--text-color); margin-bottom: 1.25rem; display: flex; align-items: center; gap: 0.5rem; cursor: pointer; } .card-title::before { content: ''; display: block; width: 4px; height: 1.25rem; background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%); border-radius: 2px; } .token-container { display: flex; flex-wrap: wrap; gap: 0.375rem; margin-bottom: 1rem; padding: 1rem; background-color: #2a2a2a; border-radius: 0.5rem; max-height: 200px; overflow-y: auto; transition: max-height 0.3s ease; } .token-container.expanded { max-height: none; } .token { padding: 0.375rem 0.75rem; border-radius: 0.375rem; background-color: var(--input-bg); font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Fira Mono', 'Droid Sans Mono', 'Source Code Pro', monospace; font-size: 0.875rem; color: var(--text-color); cursor: default; transition: var(--transition); box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05); } .token:hover { transform: translateY(-1px); box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); } .stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1.5rem; margin-bottom: 2rem; } .stat-card { background-color: var(--card-bg); padding: 1.5rem; border-radius: 1rem; box-shadow: var(--card-shadow); transition: var(--transition); } .stat-card:hover { transform: translateY(-2px); box-shadow: 0 6px 12px -2px rgba(0, 0, 0, 0.1); } .stat-title { color: var(--secondary-text); font-size: 0.875rem; font-weight: 500; margin-bottom: 0.5rem; text-transform: uppercase; letter-spacing: 0.05em; } .stat-value { color: var(--text-color); font-size: 2rem; font-weight: 700; line-height: 1.2; margin-bottom: 0.25rem; } .stat-description { color: var(--secondary-text); font-size: 0.875rem; } .expand-button { background: none; border: none; color: var(--primary-color); font-size: 0.875rem; padding: 0.5rem; cursor: pointer; display: block; margin: 0 auto; box-shadow: none; } .expand-button:hover { text-decoration: underline; transform: none; box-shadow: none; } .error-message { color: #EF4444; background-color: #3a1f1f; border: 1px solid #562626; padding: 1rem; border-radius: 0.5rem; margin-bottom: 1rem; display: none; } .display-limit-notice { background-color: #4b2b07; border: 1px solid #7c4a02; color: #FFD591; padding: 0.75rem; border-radius: 0.5rem; margin-top: 1rem; font-size: 0.875rem; display: none; } /* File drop zone styles */ .file-drop-zone { position: fixed; top: 0; left: 0; width: 100%; height: 100%; background-color: rgba(15, 79, 155, 0.15); z-index: 1000; display: flex; justify-content: center; align-items: center; opacity: 0; pointer-events: none; transition: opacity 0.3s ease; } .file-drop-zone.active { opacity: 1; pointer-events: all; } .drop-indicator { background-color: var(--card-bg); border: 2px dashed var(--primary-color); border-radius: 1rem; padding: 2rem; text-align: center; width: 60%; max-width: 400px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25); animation: pulse 2s infinite; } @keyframes pulse { 0% { transform: scale(1); } 50% { transform: scale(1.05); } 100% { transform: scale(1); } } .drop-indicator p { margin-bottom: 0.5rem; color: var(--text-color); font-size: 1.2rem; } .file-icon { font-size: 3rem; margin-bottom: 1rem; color: var(--primary-color); } .file-upload-icon { position: fixed; bottom: 20px; left: 20px; width: 45px; height: 45px; background-color: var(--card-bg); border-radius: 50%; display: flex; justify-content: center; align-items: center; cursor: pointer; z-index: 100; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2); transition: transform 0.2s ease, box-shadow 0.2s ease; } .file-upload-icon:hover { transform: translateY(-2px); box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3); } .file-upload-icon span { font-size: 1.5rem; color: var(--primary-color); } .file-info { position: fixed; bottom: 20px; left: 75px; background-color: var(--card-bg); color: var(--primary-color); font-weight: 500; padding: 0.5rem 1rem; border-radius: 1rem; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2); max-width: 270px; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; z-index: 100; display: none; } .file-detach { margin-left: 8px; display: inline-block; width: 18px; height: 18px; background-color: rgba(255, 255, 255, 0.1); color: var(--text-color); border-radius: 50%; text-align: center; line-height: 16px; font-size: 12px; cursor: pointer; transition: all 0.2s ease; } .file-detach:hover { background-color: rgba(255, 0, 0, 0.2); color: #ff6b6b; transform: scale(1.1); } .preview-notice { background-color: #273c56; border: 1px solid #365a82; color: #89b4e8; padding: 0.75rem; border-radius: 0.5rem; margin-top: 1rem; font-size: 0.875rem; display: none; } .custom-model-wrapper { position: relative; } .model-badge { position: absolute; top: -10px; right: -5px; background: linear-gradient(135deg, #22c55e 0%, #15803d 100%); color: white; font-size: 0.7rem; font-weight: 700; padding: 0.25rem 0.5rem; border-radius: 999px; transform: scale(0); transition: transform 0.3s cubic-bezier(0.175, 0.885, 0.32, 1.275); box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2); z-index: 10; } .model-badge.show { transform: scale(1); } .custom-model-help { display: inline-block; width: 16px; height: 16px; line-height: 16px; font-size: 11px; font-weight: bold; text-align: center; background-color: var(--secondary-text); color: var(--card-bg); border-radius: 50%; margin-left: 5px; cursor: help; vertical-align: middle; } .tooltip { position: absolute; top: 100%; left: 0; width: 280px; background-color: #333; color: #fff; padding: 0.75rem; border-radius: 0.5rem; font-size: 0.8rem; margin-top: 0.5rem; z-index: 100; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); opacity: 0; visibility: hidden; transition: opacity 0.2s, visibility 0.2s; } .custom-model-help:hover + .tooltip { opacity: 1; visibility: visible; } /* Tokenizer info icon and tooltip styles */ .tokenizer-info-icon { display: inline-flex; align-items: center; justify-content: center; width: 24px; height: 24px; background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%); color: white; border-radius: 50%; position: absolute; left: -32px; /* Position to the left of the selector */ top: 50%; transform: translateY(-50%); cursor: pointer; font-size: 12px; font-weight: bold; transition: all 0.2s ease; z-index: 10; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2); } .tokenizer-info-icon:hover { transform: translateY(-50%) scale(1.1); box-shadow: 0 3px 8px rgba(0, 0, 0, 0.3); } /* Watermark styles */ .watermark { position: fixed; bottom: 20px; right: 20px; color: var(--primary-color); font-size: 1.4rem; font-weight: 700; opacity: 0.25; /* Semi-transparent */ z-index: 100; transition: opacity 0.3s ease; text-decoration: none; pointer-events: auto; /* Ensure it remains clickable */ } .watermark:hover { opacity: 0.6; /* Increase opacity on hover */ } .tokenizer-info-tooltip { position: absolute; top: calc(100% + 8px); left: -30px; /* Adjust position to align with the icon */ width: 300px; background-color: var(--card-bg); color: var(--text-color); border: 1px solid var(--primary-color); border-radius: 0.75rem; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3); padding: 1rem; z-index: 1000; /* Increase z-index to ensure visibility */ opacity: 0; visibility: hidden; transition: opacity 0.3s, visibility 0.3s; pointer-events: none; /* Initially disable pointer events */ } .tokenizer-info-icon:not(.tooltip-disabled):hover + .tokenizer-info-tooltip { opacity: 1; visibility: visible; pointer-events: auto; } .tokenizer-info-tooltip:hover { opacity: 1; visibility: visible; pointer-events: auto; } .tokenizer-info-header { font-size: 1.1rem; font-weight: 600; margin-bottom: 0.5rem; padding-bottom: 0.5rem; border-bottom: 1px solid rgba(255, 255, 255, 0.1); color: var(--primary-color); } .tokenizer-info-grid { display: grid; grid-template-columns: repeat(2, 1fr); gap: 0.75rem; margin: 0.75rem 0; } .tokenizer-info-item { display: flex; flex-direction: column; } .tokenizer-info-label { font-size: 0.75rem; color: var(--secondary-text); margin-bottom: 0.25rem; } .tokenizer-info-value { font-size: 0.95rem; font-weight: 500; } .special-tokens-container { margin-top: 0.75rem; background-color: rgba(15, 79, 155, 0.1); border-radius: 0.5rem; padding: 0.5rem; max-height: 100px; overflow-y: auto; } .special-token-item { display: flex; justify-content: space-between; margin-bottom: 0.25rem; font-size: 0.8rem; } .token-name { color: var(--secondary-text); } .token-value { background-color: rgba(255, 255, 255, 0.1); padding: 1px 4px; border-radius: 2px; font-family: monospace; } .tokenizer-info-loading { display: flex; justify-content: center; align-items: center; height: 100px; } .tokenizer-info-spinner { width: 30px; height: 30px; border: 3px solid var(--primary-color); border-radius: 50%; border-top-color: transparent; animation: spin 1s linear infinite; } .tokenizer-info-error { color: #f87171; font-size: 0.9rem; text-align: center; padding: 1rem; } @media (max-width: 768px) { .header { flex-direction: column; align-items: stretch; gap: 1rem; } .model-selector { width: 100%; } .stats-grid { grid-template-columns: 1fr; } .tokenizer-info-tooltip { width: 250px; } } </style> </head> <body> <!-- Hidden File Drop Zone that appears when dragging files --> <div id="fileDropZone" class="file-drop-zone"> <div class="drop-indicator"> <div class="file-icon">📄</div> <p>Drop your file here</p> </div> </div> <!-- File upload icon in bottom left corner --> <div id="fileUploadIcon" class="file-upload-icon"> <span>📎</span> </div> <p class="file-info" id="fileInfo"></p> <div class="container"> <div class="header"> <div class="title-section"> <h1 class="title">Token Visualizer</h1> <p class="subtitle">Advanced tokenization analysis and visualization</p> </div> <div class="model-selector"> <div class="model-selector-header"> <div class="model-type-toggle"> <div class="toggle-option predefined-toggle active" data-type="predefined">Predefined</div> <div class="toggle-option custom-toggle" data-type="custom">Custom</div> </div> </div> <div id="predefinedModelSelector"> <div style="position: relative;"> <div class="tokenizer-info-icon" id="modelInfoIcon" title="View tokenizer information">ℹ</div> <!-- TOOLTIP MOVED HERE --> <div class="tokenizer-info-tooltip" id="modelInfoTooltip"> <div id="tokenizerInfoContent"> <div class="tokenizer-info-loading"> <div class="tokenizer-info-spinner"></div> </div> </div> </div> <!-- SELECT NOW COMES AFTER ICON AND TOOLTIP --> <select id="modelSelect" name="model"> {% for model_id, info in models.items() %} <option value="{{ model_id }}" {% if selected_model == model_id %}selected{% endif %}> {{ info.alias }} </option> {% endfor %} </select> </div> </div> <div id="customModelSelector" style="display: none;" class="custom-model-wrapper"> <div style="position: relative;"> <div class="tokenizer-info-icon" id="customModelInfoIcon" title="View tokenizer information">ℹ</div> <div class="tokenizer-info-tooltip" id="customModelInfoTooltip"> <div id="customTokenizerInfoContent"> <div class="tokenizer-info-loading"> <div class="tokenizer-info-spinner"></div> </div> </div> </div> <input type="text" id="customModelInput" class="custom-model-input" placeholder="Enter HuggingFace model path" value="{{ custom_model if custom_model and custom_model|length > 0 else '' }}"> </div> <span class="custom-model-help">?</span> <div class="tooltip"> Enter a valid HuggingFace model ID (e.g., "mistralai/Mistral-7B-Instruct-v0.3") The model must have a tokenizer available and must be not restricted. (with some exceptions) Also some models have restrictions. You can use mirrored versions, like unsloth to omit that. Like ("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit") instead of original path. </div> <div class="model-badge" id="modelSuccessBadge">Loaded</div> </div> </div> </div> <div class="error-message" id="errorMessage">{{ error }}</div> <div class="input-section"> <form id="analyzeForm" method="POST" enctype="multipart/form-data"> <textarea name="text" id="textInput" placeholder="Enter text to analyze or upload a file in bottom left corner...">{{ text }}</textarea> <input type="hidden" name="model" id="modelInput" value="{{ selected_model }}"> <input type="hidden" name="custom_model" id="customModelInputHidden" value="{{ custom_model if custom_model else '' }}"> <input type="hidden" name="model_type" id="modelTypeInput" value="{{ model_type if model_type else 'predefined' }}"> <input type="file" name="file" id="fileInput" style="display: none;"> <div class="button-container"> <button type="submit" id="analyzeButton">Analyze Text</button> </div> </form> </div> <div id="results" class="results" {% if not token_data %}style="display: none;"{% endif %}> <div class="card"> <h2 class="card-title">Token Visualization</h2> <div class="preview-notice" id="previewNotice"> Note: Showing preview of first 8096 characters. Stats are calculated on the full file. </div> <div class="token-container" id="tokenContainer"> {% if token_data %} {% for token in token_data.tokens %} <span class="token" style="background-color: {{ token.colors.background }}; color: {{ token.colors.text }};" title="Original token: {{ token.original }} | Token ID: {{ token.token_id }}"> {{ token.display }} </span> {% if token.newline %}<br>{% endif %} {% endfor %} {% endif %} </div> <button class="expand-button" id="expandButton">Show More</button> <div class="display-limit-notice" id="displayLimitNotice"> Note: Only showing first 50,000 tokens. Total token count: <span id="totalTokenCount">0</span> </div> </div> <div class="stats-grid"> <div class="stat-card"> <div class="stat-title">Total Tokens</div> <div class="stat-value" id="totalTokens">{{ token_data.stats.basic_stats.total_tokens if token_data else 0 }}</div> <div class="stat-description"> <span id="uniqueTokens">{{ token_data.stats.basic_stats.unique_tokens if token_data else 0 }} unique</span> (<span id="uniquePercentage">{{ token_data.stats.basic_stats.unique_percentage if token_data else 0 }}</span>%) </div> </div> <div class="stat-card"> <div class="stat-title">Token Types</div> <div class="stat-value" id="specialTokens">{{ token_data.stats.basic_stats.special_tokens if token_data else 0 }}</div> <div class="stat-description">special tokens</div> </div> <div class="stat-card"> <div class="stat-title">Whitespace</div> <div class="stat-value" id="spaceTokens">{{ token_data.stats.basic_stats.space_tokens if token_data else 0 }}</div> <div class="stat-description"> spaces: <span id="spaceCount">{{ token_data.stats.basic_stats.space_tokens if token_data else 0 }}</span>, newlines: <span id="newlineCount">{{ token_data.stats.basic_stats.newline_tokens if token_data else 0 }}</span> </div> </div> <div class="stat-card"> <div class="stat-title">Token Length</div> <div class="stat-value" id="avgLength">{{ token_data.stats.length_stats.avg_length if token_data else 0 }}</div> <div class="stat-description"> median: <span id="medianLength">{{ token_data.stats.length_stats.median_length if token_data else 0 }}</span>, ±<span id="stdDev">{{ token_data.stats.length_stats.std_dev if token_data else 0 }}</span> std </div> </div> <div class="stat-card"> <div class="stat-title">Compression</div> <div class="stat-value" id="compressionRatio">{{ token_data.stats.basic_stats.compression_ratio if token_data else 0 }}</div> <div class="stat-description">characters per token</div> </div> </div> </div> </div> <a href="https://huggingface.co/spaces/barttee/tokenizers" target="_blank" class="watermark"> @barttee/tokenizers </a> <script> $(document).ready(function() { // File handling variables let currentFile = null; let originalTextContent = null; let lastUploadedFileName = null; let fileJustUploaded = false; // Flag to prevent immediate detachment let currentModelType = "{{ model_type if model_type else 'predefined' }}"; let currentTokenizerInfo = null; // Try to parse tokenizer info if available from server try { currentTokenizerInfo = {{ token_data.tokenizer_info|tojson if token_data and token_data.tokenizer_info else 'null' }}; if (currentTokenizerInfo) { updateTokenizerInfoDisplay(currentTokenizerInfo, currentModelType === 'custom'); } } catch(e) { console.error("Error parsing tokenizer info:", e); } // Show error if exists if ("{{ error }}".length > 0) { showError("{{ error }}"); } // Setup model type based on initial state if (currentModelType === "custom") { $('.toggle-option').removeClass('active'); $('.custom-toggle').addClass('active'); $('#predefinedModelSelector').hide(); $('#customModelSelector').show(); } // Show success badge if custom model loaded successfully if (currentModelType === "custom" && !("{{ error }}".length > 0)) { $('#modelSuccessBadge').addClass('show'); setTimeout(() => { $('#modelSuccessBadge').removeClass('show'); }, 3000); } // Toggle between predefined and custom model inputs $('.toggle-option').click(function() { const modelType = $(this).data('type'); $('.toggle-option').removeClass('active'); $(this).addClass('active'); currentModelType = modelType; if (modelType === 'predefined') { $('#predefinedModelSelector').show(); $('#customModelSelector').hide(); $('#modelTypeInput').val('predefined'); // Set the model input value to the selected predefined model $('#modelInput').val($('#modelSelect').val()); } else { $('#predefinedModelSelector').hide(); $('#customModelSelector').show(); $('#modelTypeInput').val('custom'); } // Clear tokenizer info if switching models if (modelType === 'predefined') { $('#tokenizerInfoContent').html('<div class="tokenizer-info-loading"><div class="tokenizer-info-spinner"></div></div>'); fetchTokenizerInfo($('#modelSelect').val(), false); } else { $('#customTokenizerInfoContent').html('<div class="tokenizer-info-loading"><div class="tokenizer-info-spinner"></div></div>'); // Only fetch if there's a custom model value const customModel = $('#customModelInput').val(); if (customModel) { fetchTokenizerInfo(customModel, true); } } }); // Update hidden input when custom model input changes $('#customModelInput').on('input', function() { $('#customModelInputHidden').val($(this).val()); }); function showError(message) { const errorDiv = $('#errorMessage'); errorDiv.text(message); errorDiv.show(); setTimeout(() => errorDiv.fadeOut(), 5000); } // Function to update tokenizer info display in tooltip function updateTokenizerInfoDisplay(info, isCustom = false) { const targetSelector = isCustom ? '#customTokenizerInfoContent' : '#tokenizerInfoContent'; let htmlContent = ''; if (info.error) { $(targetSelector).html(`<div class="tokenizer-info-error">${info.error}</div>`); return; } // Start building the tooltip content htmlContent = `<div class="tokenizer-info-header">Tokenizer Details</div> <div class="tokenizer-info-grid">`; // Dictionary size if (info.vocab_size) { htmlContent += ` <div class="tokenizer-info-item"> <span class="tokenizer-info-label">Dictionary Size</span> <span class="tokenizer-info-value">${info.vocab_size.toLocaleString()}</span> </div>`; } // Tokenizer type if (info.tokenizer_type) { htmlContent += ` <div class="tokenizer-info-item"> <span class="tokenizer-info-label">Tokenizer Type</span> <span class="tokenizer-info-value">${info.tokenizer_type}</span> </div>`; } // Max length if (info.model_max_length) { htmlContent += ` <div class="tokenizer-info-item"> <span class="tokenizer-info-label">Max Length</span> <span class="tokenizer-info-value">${info.model_max_length.toLocaleString()}</span> </div>`; } htmlContent += `</div>`; // Close tokenizer-info-grid // Special tokens section if (info.special_tokens && Object.keys(info.special_tokens).length > 0) { htmlContent += ` <div class="tokenizer-info-item" style="margin-top: 0.75rem;"> <span class="tokenizer-info-label">Special Tokens</span> <div class="special-tokens-container">`; // Add each special token with proper escaping for HTML special characters for (const [tokenName, tokenValue] of Object.entries(info.special_tokens)) { // Properly escape HTML special characters const escapedValue = tokenValue .replace(/&/g, '&') .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') .replace(/'/g, '''); htmlContent += ` <div class="special-token-item"> <span class="token-name">${tokenName}:</span> <span class="token-value">${escapedValue}</span> </div>`; } htmlContent += ` </div> </div>`; } $(targetSelector).html(htmlContent); } // Function to fetch tokenizer info function fetchTokenizerInfo(modelId, isCustom = false) { if (!modelId) return; const targetSelector = isCustom ? '#customTokenizerInfoContent' : '#tokenizerInfoContent'; $(targetSelector).html('<div class="tokenizer-info-loading"><div class="tokenizer-info-spinner"></div></div>'); $.ajax({ url: '/tokenizer-info', method: 'GET', data: { model_id: modelId, is_custom: isCustom }, success: function(response) { if (response.error) { $(targetSelector).html(`<div class="tokenizer-info-error">${response.error}</div>`); } else { currentTokenizerInfo = response; updateTokenizerInfoDisplay(response, isCustom); } }, error: function(xhr) { $(targetSelector).html('<div class="tokenizer-info-error">Failed to load tokenizer information</div>'); } }); } function updateResults(data) { $('#results').show(); // Update tokens const tokenContainer = $('#tokenContainer'); tokenContainer.empty(); data.tokens.forEach(token => { const span = $('<span>') .addClass('token') .css({ 'background-color': token.colors.background, 'color': token.colors.text }) // Include token id in the tooltip on hover .attr('title', `Original token: ${token.original} | Token ID: ${token.token_id}`) .text(token.display); tokenContainer.append(span); if (token.newline) { tokenContainer.append('<br>'); } }); // Update display limit notice if (data.display_limit_reached) { $('#displayLimitNotice').show(); $('#totalTokenCount').text(data.total_tokens); } else { $('#displayLimitNotice').hide(); } // Update preview notice if (data.preview_only) { $('#previewNotice').show(); } else { $('#previewNotice').hide(); } // Update basic stats $('#totalTokens').text(data.stats.basic_stats.total_tokens); $('#uniqueTokens').text(`${data.stats.basic_stats.unique_tokens} unique`); $('#uniquePercentage').text(data.stats.basic_stats.unique_percentage); $('#specialTokens').text(data.stats.basic_stats.special_tokens); $('#spaceTokens').text(data.stats.basic_stats.space_tokens); $('#spaceCount').text(data.stats.basic_stats.space_tokens); $('#newlineCount').text(data.stats.basic_stats.newline_tokens); $('#compressionRatio').text(data.stats.basic_stats.compression_ratio); // Update length stats $('#avgLength').text(data.stats.length_stats.avg_length); $('#medianLength').text(data.stats.length_stats.median_length); $('#stdDev').text(data.stats.length_stats.std_dev); // Update tokenizer info if available if (data.tokenizer_info) { currentTokenizerInfo = data.tokenizer_info; updateTokenizerInfoDisplay(data.tokenizer_info, currentModelType === 'custom'); } } // Handle text changes to detach file $('#textInput').on('input', function() { // Skip if file was just uploaded (prevents immediate detachment) if (fileJustUploaded) { fileJustUploaded = false; return; } const currentText = $(this).val(); const fileInput = document.getElementById('fileInput'); // Only detach if a file exists and text has been substantially modified if (fileInput.files.length > 0 && originalTextContent !== null) { // Check if the text is completely different or has been significantly changed // This allows for small edits without detaching const isMajorChange = currentText.length < originalTextContent.length * 0.8 || // Text reduced by at least 20% (currentText.length > 0 && currentText !== originalTextContent.substring(0, currentText.length) && currentText.substring(0, Math.min(20, currentText.length)) !== originalTextContent.substring(0, Math.min(20, currentText.length))); if (isMajorChange) { detachFile(); } } }); // Function to detach file function detachFile() { // Clear the file input $('#fileInput').val(''); // Hide file info $('#fileInfo').fadeOut(300); // Reset the original content tracker originalTextContent = $('#textInput').val(); // Reset last uploaded filename lastUploadedFileName = null; } // For model changes $('#modelSelect').change(function() { const selectedModel = $(this).val(); $('#modelInput').val(selectedModel); // Fetch tokenizer info for the selected model fetchTokenizerInfo(selectedModel, false); // If text exists, submit the form if ($('#textInput').val().trim()) { $('#analyzeForm').submit(); } }); // File drop handling const fileDropZone = $('#fileDropZone'); const fileUploadIcon = $('#fileUploadIcon'); // Prevent default drag behaviors ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => { fileDropZone[0].addEventListener(eventName, preventDefaults, false); document.body.addEventListener(eventName, preventDefaults, false); }); function preventDefaults(e) { e.preventDefault(); e.stopPropagation(); } // Show drop zone when file is dragged over the document document.addEventListener('dragenter', showDropZone, false); document.addEventListener('dragover', showDropZone, false); fileDropZone[0].addEventListener('dragleave', hideDropZone, false); fileDropZone[0].addEventListener('drop', hideDropZone, false); function showDropZone(e) { fileDropZone.addClass('active'); } function hideDropZone() { fileDropZone.removeClass('active'); } // Handle dropped files fileDropZone[0].addEventListener('drop', handleDrop, false); function handleDrop(e) { const dt = e.dataTransfer; const files = dt.files; handleFiles(files); } // Also handle file selection via click on the icon fileUploadIcon.on('click', function() { const input = document.createElement('input'); input.type = 'file'; input.onchange = e => { handleFiles(e.target.files); }; input.click(); }); function handleFiles(files) { if (files.length) { const file = files[0]; currentFile = file; lastUploadedFileName = file.name; fileJustUploaded = true; // Set flag to prevent immediate detachment // Show file info with animation and add detach button $('#fileInfo').html(`${file.name} (${formatFileSize(file.size)}) <span class="file-detach" id="fileDetach"><i class="fas fa-times"></i></span>`).fadeIn(300); // Add click handler for detach button $('#fileDetach').on('click', function(e) { e.stopPropagation(); // Prevent event bubbling detachFile(); return false; }); // Set the file to the file input const dataTransfer = new DataTransfer(); dataTransfer.items.add(file); document.getElementById('fileInput').files = dataTransfer.files; // Preview in textarea (first 8096 chars) const reader = new FileReader(); reader.onload = function(e) { const previewText = e.target.result.slice(0, 8096); $('#textInput').val(previewText); // Store this as the original content AFTER setting the value // to prevent the input event from firing and detaching immediately setTimeout(() => { originalTextContent = previewText; // Automatically submit for analysis $('#analyzeForm').submit(); }, 50); }; reader.readAsText(file); } } function formatFileSize(bytes) { if (bytes < 1024) return bytes + ' bytes'; else if (bytes < 1048576) return (bytes / 1024).toFixed(1) + ' KB'; else return (bytes / 1048576).toFixed(1) + ' MB'; } // Make sure to check if there's still a file when analyzing $('#analyzeForm').on('submit', function(e) { e.preventDefault(); // Skip detachment check if file was just uploaded if (!fileJustUploaded) { // Check if text has been changed but file is still attached const textInput = $('#textInput').val(); const fileInput = document.getElementById('fileInput'); if (fileInput.files.length > 0 && originalTextContent !== null && textInput !== originalTextContent && textInput.length < originalTextContent.length * 0.8) { // Text was significantly changed but file is still attached, detach it detachFile(); } } else { // Reset flag after first submission fileJustUploaded = false; } // Update the hidden inputs based on current model type if (currentModelType === 'custom') { $('#customModelInputHidden').val($('#customModelInput').val()); } else { $('#modelInput').val($('#modelSelect').val()); } const formData = new FormData(this); $('#analyzeButton').prop('disabled', true); $.ajax({ url: '/', method: 'POST', data: formData, processData: false, contentType: false, success: function(response) { if (response.error) { showError(response.error); } else { updateResults(response); // Show success badge if custom model if (currentModelType === 'custom') { $('#modelSuccessBadge').addClass('show'); setTimeout(() => { $('#modelSuccessBadge').removeClass('show'); }, 3000); } } }, error: function(xhr) { showError(xhr.responseText || 'An error occurred while processing the text'); }, complete: function() { $('#analyzeButton').prop('disabled', false); } }); }); $('#expandButton').click(function() { const container = $('#tokenContainer'); const isExpanded = container.hasClass('expanded'); container.toggleClass('expanded'); $(this).text(isExpanded ? 'Show More' : 'Show Less'); }); // Initialize tokenizer info for current model if (currentModelType === 'predefined') { fetchTokenizerInfo($('#modelSelect').val(), false); } else if ($('#customModelInput').val()) { fetchTokenizerInfo($('#customModelInput').val(), true); } // Add event listener for custom model input $('#customModelInput').on('change', function() { const modelValue = $(this).val(); if (modelValue) { fetchTokenizerInfo(modelValue, true); } }); }); </script> </body> </html> """ @app.route('/tokenizer-info', methods=['GET']) def tokenizer_info(): """ Endpoint to get tokenizer information without processing text. """ model_id = request.args.get('model_id', '') is_custom = request.args.get('is_custom', 'false').lower() == 'true' if not model_id: return jsonify({"error": "No model ID provided"}), 400 try: # For predefined models, use the model name from the dictionary if not is_custom and model_id in TOKENIZER_MODELS: model_id_or_name = model_id else: # For custom models, use the model ID directly model_id_or_name = model_id # Load the tokenizer and get info tokenizer, info, error = load_tokenizer(model_id_or_name) if error: return jsonify({"error": error}), 400 return jsonify(info) except Exception as e: return jsonify({"error": f"Failed to get tokenizer info: {str(e)}"}), 500 @app.route('/', methods=['GET', 'POST']) def index(): text = "" token_data = None error_message = "" selected_model = request.args.get('model', request.form.get('model', 'llama4')) custom_model = request.args.get('custom_model', request.form.get('custom_model', '')) model_type = request.args.get('model_type', request.form.get('model_type', 'predefined')) # Determine which model to use based on model_type model_to_use = selected_model if model_type == 'predefined' else custom_model if request.method == 'POST': # Check if file upload if 'file' in request.files and request.files['file'].filename: uploaded_file = request.files['file'] # Save file to tmp directory file_path = os.path.join(app.config['UPLOAD_FOLDER'], uploaded_file.filename) uploaded_file.save(file_path) # Read a small preview of the file with open(file_path, 'r', errors='replace') as f: text = f.read(8096) try: # Process the file token_data = process_text("", model_to_use, is_full_file=True, file_path=file_path) # Clean up the file after processing if os.path.exists(file_path): os.remove(file_path) # If request is AJAX, return JSON if request.headers.get('X-Requested-With') == 'XMLHttpRequest': return jsonify(token_data) except Exception as e: error_message = str(e) # Clean up the file after processing if os.path.exists(file_path): os.remove(file_path) if request.headers.get('X-Requested-With') == 'XMLHttpRequest': return jsonify({"error": error_message}), 400 return render_template_string( HTML_TEMPLATE, text=text, token_data=None, models=TOKENIZER_MODELS, selected_model=selected_model, custom_model=custom_model, model_type=model_type, error=error_message ) # Regular text processing else: text = request.form.get('text', '') if text: try: token_data = process_text(text, model_to_use) # If request is AJAX, return JSON if request.headers.get('X-Requested-With') == 'XMLHttpRequest': return jsonify(token_data) except Exception as e: error_message = str(e) if request.headers.get('X-Requested-With') == 'XMLHttpRequest': return jsonify({"error": error_message}), 400 return render_template_string( HTML_TEMPLATE, text=text, token_data=None, models=TOKENIZER_MODELS, selected_model=selected_model, custom_model=custom_model, model_type=model_type, error=error_message ) return render_template_string( HTML_TEMPLATE, text=text, token_data=token_data, models=TOKENIZER_MODELS, selected_model=selected_model, custom_model=custom_model, model_type=model_type, error=error_message ) if __name__ == "__main__": app.run(host='0.0.0.0', port=7860)