Spaces:

m7n
/

openalex_mapper

Running on Zero

App Files Files Community

m7n commited on 10 days ago

Commit

f895c88

1 Parent(s): cabc445

Many updates, mainly added categorical.

Browse files

Files changed (6) hide show

app.py +392 -72
color_utils.py +2 -2
colormap_chooser.py +933 -0
colormap_chooser_testing_app.py +47 -0
openalex_utils.py +162 -2
ui_utils.py +52 -0

app.py CHANGED Viewed

@@ -128,8 +128,10 @@ from openalex_utils import (
     get_field,
     process_records_to_df,
     openalex_url_to_filename,
-    get_records_from_dois
 )
 from styles import DATAMAP_CUSTOM_CSS
 from data_setup import (
     download_required_files,
@@ -141,7 +143,8 @@ from data_setup import (
 from network_utils import create_citation_graph, draw_citation_graph
 # Configure OpenAlex
@@ -149,6 +152,26 @@ pyalex.config.email = "maximilian.noichl@uni-bamberg.de"
 print(f"Imports completed: {time.strftime('%Y-%m-%d %H:%M:%S')}")
 # Create a static directory to store the dynamic HTML files
@@ -236,21 +259,65 @@ def create_embeddings_299(texts_to_embedd):
     return model.encode(texts_to_embedd, show_progress_bar=True, batch_size=192)
 # else:
 def create_embeddings(texts_to_embedd):
     """Create embeddings for the input texts using the loaded model."""
     return model.encode(texts_to_embedd, show_progress_bar=True, batch_size=192)
 def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_checkbox,
-           sample_reduction_method, plot_time_checkbox,
            locally_approximate_publication_date_checkbox,
            download_csv_checkbox, download_png_checkbox, citation_graph_checkbox,
-           csv_upload, highlight_color,
            progress=gr.Progress()):
     """
     Main prediction pipeline that processes OpenAlex queries and creates visualizations.
@@ -261,13 +328,14 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
         sample_size_slider (int): Maximum number of samples to process
         reduce_sample_checkbox (bool): Whether to reduce sample size
         sample_reduction_method (str): Method for sample reduction ("Random" or "Order of Results")
-        plot_time_checkbox (bool): Whether to color points by publication date
         locally_approximate_publication_date_checkbox (bool): Whether to approximate publication date locally before plotting.
         download_csv_checkbox (bool): Whether to download CSV data
         download_png_checkbox (bool): Whether to download PNG data
         citation_graph_checkbox (bool): Whether to add citation graph
         csv_upload (str): Path to uploaded CSV file
         highlight_color (str): Color for highlighting points
         progress (gr.Progress): Gradio progress tracker
     Returns:
@@ -276,6 +344,10 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
     # Initialize start_time at the beginning of the function
     start_time = time.time()
     # Helper function to generate error responses
     def create_error_response(error_message):
         return [
@@ -358,6 +430,9 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
             print(f"Successfully loaded {len(records_df)} records from uploaded file")
             progress(0.2, desc="Processing uploaded data...")
         except Exception as e:
             error_message = f"Error processing uploaded file: {str(e)}"
             return create_error_response(error_message)
@@ -374,6 +449,7 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
         # Split input into multiple URLs if present
         urls = [url.strip() for url in text_input.split(';')]
         records = []
         total_query_length = 0
         # Use first URL for filename
@@ -388,54 +464,154 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
             total_query_length += query_length
             print(f'Requesting {query_length} entries from query {i+1}/{len(urls)}...')
-            target_size = sample_size_slider if reduce_sample_checkbox and sample_reduction_method == "First n samples" else query_length
-            records_per_query = 0
-            should_break = False
-            for page in query.paginate(per_page=200, n_max=None):
-                # Add retry mechanism for processing each page
-                max_retries = 5
-                base_wait_time = 1  # Starting wait time in seconds
-                exponent = 1.5  # Exponential factor
-                for retry_attempt in range(max_retries):
-                    try:
                         for record in page:
-                            records.append(record)
-                            records_per_query += 1
-                            progress(0.1 + (0.2 * len(records) / (total_query_length)),
-                                    desc=f"Getting data from query {i+1}/{len(urls)}...")
-                            if reduce_sample_checkbox and sample_reduction_method == "First n samples" and records_per_query >= target_size:
-                                should_break = True
-                                break
-                        # If we get here without an exception, break the retry loop
-                        break
-                    except Exception as e:
-                        print(f"Error processing page: {e}")
-                        if retry_attempt < max_retries - 1:
-                            wait_time = base_wait_time * (exponent ** retry_attempt) + random.random()
-                            print(f"Retrying in {wait_time:.2f} seconds (attempt {retry_attempt + 1}/{max_retries})...")
-                            time.sleep(wait_time)
-                        else:
-                            print(f"Maximum retries reached. Continuing with next page.")
-                if should_break:
                     break
-            if should_break:
-                break
         print(f"Query completed in {time.time() - start_time:.2f} seconds")
         # Process records
         processing_start = time.time()
         records_df = process_records_to_df(records)
-        if reduce_sample_checkbox and sample_reduction_method != "All":
-            sample_size = min(sample_size_slider, len(records_df))
-            if sample_reduction_method == "n random samples":
-                records_df = records_df.sample(sample_size)
-            elif sample_reduction_method == "First n samples":
-                records_df = records_df.iloc[:sample_size]
         print(f"Records processed in {time.time() - processing_start:.2f} seconds")
     # Create embeddings - this happens regardless of data source
@@ -468,14 +644,68 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
     viz_prep_start = time.time()
     progress(0.6, desc="Preparing visualization data...")
     basedata_df['color'] = '#ced4d211'
     highlight_color = rgba_to_hex(highlight_color)
-    if not plot_time_checkbox:
-        records_df['color'] = highlight_color
-    else:
-        cmap = colormaps.haline
         if not locally_approximate_publication_date_checkbox:
             # Create color mapping based on publication years
             years = pd.to_numeric(records_df['publication_year'])
@@ -495,6 +725,9 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
             ])
             norm = mcolors.Normalize(vmin=local_years.min(), vmax=local_years.max())
             records_df['color'] = [mcolors.to_hex(cmap(norm(year))) for year in local_years]
     stacked_df = pd.concat([basedata_df, records_df], axis=0, ignore_index=True)
     stacked_df = stacked_df.fillna("Unlabelled")
@@ -562,7 +795,13 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
         export_df = records_df[['title', 'abstract', 'doi', 'publication_year', 'x', 'y','id','primary_topic']]
         export_df['parsed_field'] = [get_field(row) for ix, row in export_df.iterrows()]
         export_df['referenced_works'] = [', '.join(x) for x in records_df['referenced_works']]
-        if locally_approximate_publication_date_checkbox and plot_time_checkbox:
             export_df['approximate_publication_year'] = local_years
         export_df.to_csv(csv_file_path, index=False)
@@ -628,13 +867,23 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
         # Time-based visualization
         scatter_start = time.time()
-        if plot_time_checkbox:
             if locally_approximate_publication_date_checkbox:
                 scatter = plt.scatter(
                     umap_embeddings[:,0],
                     umap_embeddings[:,1],
                     c=local_years,
-                    cmap=colormaps.haline,
                     alpha=0.8,
                     s=point_size
                 )
@@ -644,7 +893,7 @@ def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_c
                     umap_embeddings[:,0],
                     umap_embeddings[:,1],
                     c=years,
-                    cmap=colormaps.haline,
                     alpha=0.8,
                     s=point_size
                 )
@@ -713,8 +962,8 @@ function refresh() {
 # Gradio interface setup
-with gr.Blocks(theme=theme, css="""
-    .gradio-container a {
         color: black !important;
         text-decoration: none !important;  /* Force remove default underline */
         font-weight: bold;
@@ -722,11 +971,14 @@ with gr.Blocks(theme=theme, css="""
         display: inline-block;  /* Enable proper spacing for descenders */
         line-height: 1.1;  /* Adjust line height */
         padding-bottom: 2px;  /* Add space for descenders */
-    }
-    .gradio-container a:hover {
         color: #b23310 !important;
         border-bottom: 3px solid #b23310;  /* Wider underline, only on hover */
-    }
 """, js=js_light) as demo:
     gr.Markdown("""
     <div style="max-width: 100%; margin: 0 auto;">
@@ -756,6 +1008,13 @@ with gr.Blocks(theme=theme, css="""
             text_input = gr.Textbox(label="OpenAlex-search URL",
                                     info="Enter the URL to an OpenAlex-search.")
             gr.Markdown("### Sample Settings")
             reduce_sample_checkbox = gr.Checkbox(
                 label="Reduce Sample Size",
@@ -766,7 +1025,8 @@ with gr.Blocks(theme=theme, css="""
                 ["All", "First n samples", "n random samples"],
                 label="Sample Selection Method",
                 value="First n samples",
-                info="How to choose the samples to keep."
             )
             if is_running_in_hf_zero_gpu():
@@ -781,20 +1041,32 @@ with gr.Blocks(theme=theme, css="""
                 step=10,
                 value=1000,
                 info="How many samples to keep.",
-                visible=True
             )
             gr.Markdown("### Plot Settings")
-            plot_time_checkbox = gr.Checkbox(
-                label="Plot Time",
-                value=True,
-                info="Colour points by their publication date."
             )
             locally_approximate_publication_date_checkbox = gr.Checkbox(
                 label="Locally Approximate Publication Date",
                 value=True,
-                info="Colour points by the average publication date in their area."
             )
             gr.Markdown("### Download Options")
             download_csv_checkbox = gr.Checkbox(
@@ -821,14 +1093,24 @@ with gr.Blocks(theme=theme, css="""
                 label="Upload your own CSV file downloaded via pyalex.",
                 file_types=[".csv"],
             )
             # --- Aesthetics Accordion ---
             with gr.Accordion("Aesthetics", open=False):
                 highlight_color_picker = gr.ColorPicker(
                     label="Highlight Color",
                     value="#5e2784",
-                    info="Choose the highlight color for your query points."
                 )
         with gr.Column(scale=2):
             html = gr.HTML(
@@ -877,15 +1159,43 @@ with gr.Blocks(theme=theme, css="""
     </div>
     """)
-    def update_slider_visibility(method):
-        return gr.Slider(visible=(method != "All"))
     sample_reduction_method.change(
-        fn=update_slider_visibility,
-        inputs=[sample_reduction_method],
-        outputs=[sample_size_slider]
     )
     def show_cancel_button():
         return gr.Button(visible=True)
@@ -908,13 +1218,16 @@ with gr.Blocks(theme=theme, css="""
             sample_size_slider,
             reduce_sample_checkbox,
             sample_reduction_method,
-            plot_time_checkbox,
             locally_approximate_publication_date_checkbox,
             download_csv_checkbox,
             download_png_checkbox,
             citation_graph_checkbox,
             csv_upload,
-            highlight_color_picker
         ],
         outputs=[html, html_download, csv_download, png_download, cancel_btn]
     )
@@ -927,6 +1240,13 @@ with gr.Blocks(theme=theme, css="""
         queue=False  # Important to make the button hide immediately
     )
 # demo.static_dirs = {
 #     "static": str(static_dir)

     get_field,
     process_records_to_df,
     openalex_url_to_filename,
+    get_records_from_dois,
+    openalex_url_to_readable_name
 )
+from ui_utils import highlight_queries
 from styles import DATAMAP_CUSTOM_CSS
 from data_setup import (
     download_required_files,
 from network_utils import create_citation_graph, draw_citation_graph
+# Add colormap chooser imports
+from colormap_chooser import ColormapChooser, setup_colormaps
 # Configure OpenAlex
 print(f"Imports completed: {time.strftime('%Y-%m-%d %H:%M:%S')}")
+# Set up colormaps for the chooser
+print("Setting up colormaps...")
+colormap_categories = setup_colormaps(
+    included_collections=['matplotlib', 'cmocean', 'scientific', 'cmasher'],
+    excluded_collections=['colorcet', 'carbonplan', 'sciviz']
+)
+colormap_chooser = ColormapChooser(
+    categories=colormap_categories,
+    smooth_steps=10,
+    strip_width=200,
+    strip_height=50,
+    css_height=200,
+    # show_search=False,
+    # show_category=False,
+    # show_preview=False,
+    # show_selected_name=True,
+    # show_selected_info=False,
+    gallery_kwargs=dict(columns=3, allow_preview=False, height="200px")
+)
 # Create a static directory to store the dynamic HTML files
     return model.encode(texts_to_embedd, show_progress_bar=True, batch_size=192)
 # else:
 def create_embeddings(texts_to_embedd):
     """Create embeddings for the input texts using the loaded model."""
     return model.encode(texts_to_embedd, show_progress_bar=True, batch_size=192)
+def highlight_queries(text: str) -> str:
+    """Split OpenAlex URLs on semicolons and display them as colored pills with readable names."""
+    palette = [
+        "#e8f4fd", "#fff2e8", "#f0f9e8", "#fdf2f8",
+        "#f3e8ff", "#e8f8f5", "#fef7e8", "#f8f0e8"
+    ]
+    # Handle empty input
+    if not text or not text.strip():
+        return "<div style='padding: 10px; color: #666; font-style: italic;'>Enter OpenAlex URLs separated by semicolons to see query descriptions</div>"
+    # Split URLs on semicolons and strip whitespace
+    urls = [url.strip() for url in text.split(";") if url.strip()]
+    if not urls:
+        return "<div style='padding: 10px; color: #666; font-style: italic;'>No valid URLs found</div>"
+    pills = []
+    for i, url in enumerate(urls):
+        color = palette[i % len(palette)]
+        try:
+            # Get readable name for the URL
+            readable_name = openalex_url_to_readable_name(url)
+        except Exception as e:
+            print(f"Error processing URL {url}: {e}")
+            readable_name = f"Query {i+1}"
+        pills.append(
+            f'<span style="background:{color};'
+            'padding: 8px 12px; margin: 4px; '
+            'border-radius: 12px; font-weight: 500;'
+            'display: inline-block; font-family: \'Roboto Condensed\', sans-serif;'
+            'border: 1px solid rgba(0,0,0,0.1); font-size: 14px;'
+            'box-shadow: 0 1px 3px rgba(0,0,0,0.1);">'
+            f'{readable_name}</span>'
+        )
+    return (
+        "<div style='padding: 8px 0;'>"
+        "<div style='font-size: 12px; color: #666; margin-bottom: 6px; font-weight: 500;'>"
+        f"{'Query' if len(urls) == 1 else 'Queries'} ({len(urls)}):</div>"
+        "<div style='display: flex; flex-wrap: wrap; gap: 4px;'>"
+        + "".join(pills) +
+        "</div></div>"
+    )
 def predict(request: gr.Request, text_input, sample_size_slider, reduce_sample_checkbox,
+           sample_reduction_method, plot_type_dropdown,
            locally_approximate_publication_date_checkbox,
            download_csv_checkbox, download_png_checkbox, citation_graph_checkbox,
+           csv_upload, highlight_color, selected_colormap_name, seed_value,
            progress=gr.Progress()):
     """
     Main prediction pipeline that processes OpenAlex queries and creates visualizations.
         sample_size_slider (int): Maximum number of samples to process
         reduce_sample_checkbox (bool): Whether to reduce sample size
         sample_reduction_method (str): Method for sample reduction ("Random" or "Order of Results")
+        plot_type_dropdown (str): Type of plot coloring ("No special coloring", "Time-based coloring", "Categorical coloring")
         locally_approximate_publication_date_checkbox (bool): Whether to approximate publication date locally before plotting.
         download_csv_checkbox (bool): Whether to download CSV data
         download_png_checkbox (bool): Whether to download PNG data
         citation_graph_checkbox (bool): Whether to add citation graph
         csv_upload (str): Path to uploaded CSV file
         highlight_color (str): Color for highlighting points
+        selected_colormap_name (str): Name of the selected colormap for time-based coloring
         progress (gr.Progress): Gradio progress tracker
     Returns:
     # Initialize start_time at the beginning of the function
     start_time = time.time()
+    # Convert dropdown selection to boolean flags for backward compatibility
+    plot_time_checkbox = plot_type_dropdown == "Time-based coloring"
+    treat_as_categorical_checkbox = plot_type_dropdown == "Categorical coloring"
     # Helper function to generate error responses
     def create_error_response(error_message):
         return [
             print(f"Successfully loaded {len(records_df)} records from uploaded file")
             progress(0.2, desc="Processing uploaded data...")
+            # For uploaded files, set all records to query_index 0
+            records_df['query_index'] = 0
         except Exception as e:
             error_message = f"Error processing uploaded file: {str(e)}"
             return create_error_response(error_message)
         # Split input into multiple URLs if present
         urls = [url.strip() for url in text_input.split(';')]
         records = []
+        query_indices = []  # Track which query each record comes from
         total_query_length = 0
         # Use first URL for filename
             total_query_length += query_length
             print(f'Requesting {query_length} entries from query {i+1}/{len(urls)}...')
+            # Use PyAlex sampling for random samples - much more efficient!
+            if reduce_sample_checkbox and sample_reduction_method == "n random samples":
+                # Use PyAlex's built-in sample method for efficient server-side sampling
+                target_size = min(sample_size_slider, query_length)
+                try:
+                    seed_int = int(seed_value) if seed_value.strip() else 42
+                except ValueError:
+                    seed_int = 42
+                    print(f"Invalid seed value '{seed_value}', using default: 42")
+                print(f'Attempting PyAlex sampling: {target_size} from {query_length} (seed={seed_int})')
+                try:
+                    # Check if PyAlex sample method exists and works
+                    if hasattr(query, 'sample'):
+                        sampled_query = query.sample(target_size, seed=seed_int)
+                        # IMPORTANT: When using sample(), must use method='page' for pagination!
+                        sampled_records = []
+                        records_count = 0
+                        for page in sampled_query.paginate(per_page=200, method='page', n_max=None):
+                            for record in page:
+                                sampled_records.append(record)
+                                records_count += 1
+                                progress(0.1 + (0.15 * records_count / target_size),
+                                        desc=f"Getting sampled data from query {i+1}/{len(urls)}... ({records_count}/{target_size})")
+                        print(f'PyAlex sampling successful: got {len(sampled_records)} records')
+                    else:
+                        raise AttributeError("sample method not available")
+                except Exception as e:
+                    print(f"PyAlex sampling failed ({e}), using fallback method...")
+                    # Fallback: get all records and sample manually
+                    all_records = []
+                    records_count = 0
+                    # Use default cursor pagination for non-sampled queries
+                    for page in query.paginate(per_page=200, n_max=None):
                         for record in page:
+                            all_records.append(record)
+                            records_count += 1
+                            progress(0.1 + (0.15 * records_count / query_length),
+                                    desc=f"Downloading for sampling from query {i+1}/{len(urls)}...")
+                    # Now sample manually
+                    if len(all_records) > target_size:
+                        import random
+                        random.seed(seed_int)
+                        sampled_records = random.sample(all_records, target_size)
+                    else:
+                        sampled_records = all_records
+                    print(f'Fallback sampling: got {len(sampled_records)} from {len(all_records)} total')
+                # Add the sampled records
+                for idx, record in enumerate(sampled_records):
+                    records.append(record)
+                    query_indices.append(i)
+                    progress(0.1 + (0.2 * len(records) / total_query_length),
+                            desc=f"Processing sampled data from query {i+1}/{len(urls)}...")
+            else:
+                # Keep existing logic for "First n samples" and "All"
+                target_size = sample_size_slider if reduce_sample_checkbox and sample_reduction_method == "First n samples" else query_length
+                records_per_query = 0
+                should_break_current_query = False
+                for page in query.paginate(per_page=200, n_max=None):
+                    # Add retry mechanism for processing each page
+                    max_retries = 5
+                    base_wait_time = 1  # Starting wait time in seconds
+                    exponent = 1.5  # Exponential factor
+                    for retry_attempt in range(max_retries):
+                        try:
+                            for record in page:
+                                records.append(record)
+                                query_indices.append(i)  # Track which query this record comes from
+                                records_per_query += 1
+                                progress(0.1 + (0.2 * len(records) / (total_query_length)),
+                                        desc=f"Getting data from query {i+1}/{len(urls)}...")
+                                if reduce_sample_checkbox and sample_reduction_method == "First n samples" and records_per_query >= target_size:
+                                    should_break_current_query = True
+                                    break
+                            # If we get here without an exception, break the retry loop
+                            break
+                        except Exception as e:
+                            print(f"Error processing page: {e}")
+                            if retry_attempt < max_retries - 1:
+                                wait_time = base_wait_time * (exponent ** retry_attempt) + random.random()
+                                print(f"Retrying in {wait_time:.2f} seconds (attempt {retry_attempt + 1}/{max_retries})...")
+                                time.sleep(wait_time)
+                            else:
+                                print(f"Maximum retries reached. Continuing with next page.")
+                if should_break_current_query:
                     break
+            # Continue to next query - don't break out of the main query loop
         print(f"Query completed in {time.time() - start_time:.2f} seconds")
+        print(f"Total records collected: {len(records)}")
+        print(f"Expected from all queries: {total_query_length}")
+        print(f"Sample method used: {sample_reduction_method}")
+        print(f"Reduce sample enabled: {reduce_sample_checkbox}")
+        if sample_reduction_method == "n random samples":
+            print(f"Seed value: {seed_value}")
         # Process records
         processing_start = time.time()
         records_df = process_records_to_df(records)
+        # Add query_index to the dataframe
+        records_df['query_index'] = query_indices[:len(records_df)]
+        if reduce_sample_checkbox and sample_reduction_method != "All" and sample_reduction_method != "n random samples":
+            # Note: We skip "n random samples" here because PyAlex sampling is already done above
+            sample_size = min(sample_size_slider, len(records_df))
+            # Check if we have multiple queries for sampling logic
+            urls = [url.strip() for url in text_input.split(';')] if text_input else ['']
+            has_multiple_queries = len(urls) > 1 and not csv_upload
+            # If using categorical coloring with multiple queries, sample each query independently
+            if treat_as_categorical_checkbox and has_multiple_queries:
+                # Sample the full sample_size from each query independently
+                unique_queries = sorted(records_df['query_index'].unique())
+                sampled_dfs = []
+                for query_idx in unique_queries:
+                    query_records = records_df[records_df['query_index'] == query_idx]
+                    # Apply the full sample size to each query (only for "First n samples")
+                    current_sample_size = min(sample_size_slider, len(query_records))
+                    if sample_reduction_method == "First n samples":
+                        sampled_query = query_records.iloc[:current_sample_size]
+                    sampled_dfs.append(sampled_query)
+                    print(f"Query {query_idx+1}: sampled {len(sampled_query)} records from {len(query_records)} available")
+                records_df = pd.concat(sampled_dfs, ignore_index=True)
+                print(f"Total after independent sampling: {len(records_df)} records")
+                print(f"Query distribution: {records_df['query_index'].value_counts().sort_index()}")
+            else:
+                # Original sampling logic for single query or non-categorical (only "First n samples" now)
+                if sample_reduction_method == "First n samples":
+                    records_df = records_df.iloc[:sample_size]
         print(f"Records processed in {time.time() - processing_start:.2f} seconds")
     # Create embeddings - this happens regardless of data source
     viz_prep_start = time.time()
     progress(0.6, desc="Preparing visualization data...")
+    # Set up colors:
     basedata_df['color'] = '#ced4d211'
+    # Convert highlight_color to hex if it isn't already
+    if not highlight_color.startswith('#'):
+        highlight_color = rgba_to_hex(highlight_color)
     highlight_color = rgba_to_hex(highlight_color)
+    print('Highlight color:', highlight_color)
+    # Check if we have multiple queries and categorical coloring is enabled
+    urls = [url.strip() for url in text_input.split(';')] if text_input else ['']
+    has_multiple_queries = len(urls) > 1 and not csv_upload
+    if treat_as_categorical_checkbox and has_multiple_queries:
+        # Use categorical coloring for multiple queries
+        print("Using categorical coloring for multiple queries")
+        # Define a categorical colormap - using distinct colors
+        categorical_colors = [
+            '#e41a1c',  # Red
+            '#377eb8',  # Blue
+            '#4daf4a',  # Green
+            '#984ea3',  # Purple
+            '#ff7f00',  # Orange
+            '#ffff33',  # Yellow
+            '#a65628',  # Brown
+            '#f781bf',  # Pink
+            '#999999',  # Gray
+            '#66c2a5',  # Teal
+            '#fc8d62',  # Light Orange
+            '#8da0cb',  # Light Blue
+            '#e78ac3',  # Light Pink
+            '#a6d854',  # Light Green
+            '#ffd92f',  # Light Yellow
+            '#e5c494',  # Beige
+            '#b3b3b3',  # Light Gray
+        ]
+        # Assign colors based on query_index
+        unique_queries = sorted(records_df['query_index'].unique())
+        query_color_map = {query_idx: categorical_colors[i % len(categorical_colors)]
+                          for i, query_idx in enumerate(unique_queries)}
+        records_df['color'] = records_df['query_index'].map(query_color_map)
+        # Add query_label for better identification
+        records_df['query_label'] = records_df['query_index'].apply(lambda x: f"Query {x+1}")
+    elif plot_time_checkbox:
+        # Use selected colormap if provided, otherwise default to haline
+        if selected_colormap_name and selected_colormap_name.strip():
+            try:
+                cmap = plt.get_cmap(selected_colormap_name)
+            except Exception as e:
+                print(f"Warning: Could not load colormap '{selected_colormap_name}': {e}")
+                cmap = colormaps.haline
+        else:
+            cmap = colormaps.haline
         if not locally_approximate_publication_date_checkbox:
             # Create color mapping based on publication years
             years = pd.to_numeric(records_df['publication_year'])
             ])
             norm = mcolors.Normalize(vmin=local_years.min(), vmax=local_years.max())
             records_df['color'] = [mcolors.to_hex(cmap(norm(year))) for year in local_years]
+    else:
+        # No special coloring - use highlight color
+        records_df['color'] = highlight_color
     stacked_df = pd.concat([basedata_df, records_df], axis=0, ignore_index=True)
     stacked_df = stacked_df.fillna("Unlabelled")
         export_df = records_df[['title', 'abstract', 'doi', 'publication_year', 'x', 'y','id','primary_topic']]
         export_df['parsed_field'] = [get_field(row) for ix, row in export_df.iterrows()]
         export_df['referenced_works'] = [', '.join(x) for x in records_df['referenced_works']]
+        # Add query information if categorical coloring is used
+        if treat_as_categorical_checkbox and has_multiple_queries:
+            export_df['query_index'] = records_df['query_index']
+            export_df['query_label'] = records_df['query_label']
+        if locally_approximate_publication_date_checkbox and plot_type_dropdown == "Time-based coloring":
             export_df['approximate_publication_year'] = local_years
         export_df.to_csv(csv_file_path, index=False)
         # Time-based visualization
         scatter_start = time.time()
+        if plot_type_dropdown == "Time-based coloring":
+            # Use selected colormap if provided, otherwise default to haline
+            if selected_colormap_name and selected_colormap_name.strip():
+                try:
+                    static_cmap = plt.get_cmap(selected_colormap_name)
+                except Exception as e:
+                    print(f"Warning: Could not load colormap '{selected_colormap_name}': {e}")
+                    static_cmap = colormaps.haline
+            else:
+                static_cmap = colormaps.haline
             if locally_approximate_publication_date_checkbox:
                 scatter = plt.scatter(
                     umap_embeddings[:,0],
                     umap_embeddings[:,1],
                     c=local_years,
+                    cmap=static_cmap,
                     alpha=0.8,
                     s=point_size
                 )
                     umap_embeddings[:,0],
                     umap_embeddings[:,1],
                     c=years,
+                    cmap=static_cmap,
                     alpha=0.8,
                     s=point_size
                 )
 # Gradio interface setup
+with gr.Blocks(theme=theme, css=f"""
+    .gradio-container a {{
         color: black !important;
         text-decoration: none !important;  /* Force remove default underline */
         font-weight: bold;
         display: inline-block;  /* Enable proper spacing for descenders */
         line-height: 1.1;  /* Adjust line height */
         padding-bottom: 2px;  /* Add space for descenders */
+    }}
+    .gradio-container a:hover {{
         color: #b23310 !important;
         border-bottom: 3px solid #b23310;  /* Wider underline, only on hover */
+    }}
+    /* Colormap chooser styles */
+    {colormap_chooser.css()}
 """, js=js_light) as demo:
     gr.Markdown("""
     <div style="max-width: 100%; margin: 0 auto;">
             text_input = gr.Textbox(label="OpenAlex-search URL",
                                     info="Enter the URL to an OpenAlex-search.")
+            # Add the query highlight display
+            query_display = gr.HTML(
+                value="<div style='padding: 10px; color: #666; font-style: italic;'>Enter OpenAlex URLs separated by semicolons to see query descriptions</div>",
+                label="",
+                show_label=False
+            )
             gr.Markdown("### Sample Settings")
             reduce_sample_checkbox = gr.Checkbox(
                 label="Reduce Sample Size",
                 ["All", "First n samples", "n random samples"],
                 label="Sample Selection Method",
                 value="First n samples",
+                info="How to choose the samples to keep.",
+                visible=True  # Will be controlled by reduce_sample_checkbox
             )
             if is_running_in_hf_zero_gpu():
                 step=10,
                 value=1000,
                 info="How many samples to keep.",
+                visible=True  # Will be controlled by reduce_sample_checkbox
+            )
+            # Add this new seed field
+            seed_textbox = gr.Textbox(
+                label="Random Seed",
+                value="42",
+                info="Seed for random sampling reproducibility.",
+                visible=False  # Will be controlled by both reduce_sample_checkbox and sample_reduction_method
             )
             gr.Markdown("### Plot Settings")
+            # Replace plot_time_checkbox with a dropdown
+            plot_type_dropdown = gr.Dropdown(
+                ["No special coloring", "Time-based coloring", "Categorical coloring"],
+                label="Plot Coloring Type",
+                value="Time-based coloring",
+                info="Choose how to color the points on the plot."
             )
             locally_approximate_publication_date_checkbox = gr.Checkbox(
                 label="Locally Approximate Publication Date",
                 value=True,
+                info="Colour points by the average publication date in their area.",
+                visible=True  # Will be controlled by plot_type_dropdown
             )
+            # Remove treat_as_categorical_checkbox since it's now part of the dropdown
             gr.Markdown("### Download Options")
             download_csv_checkbox = gr.Checkbox(
                 label="Upload your own CSV file downloaded via pyalex.",
                 file_types=[".csv"],
             )
             # --- Aesthetics Accordion ---
             with gr.Accordion("Aesthetics", open=False):
+                gr.Markdown("### Color Selection")
+                gr.Markdown("*Choose an individual color to highlight your data.*")
                 highlight_color_picker = gr.ColorPicker(
                     label="Highlight Color",
+                    show_label=False,
                     value="#5e2784",
+                    #info="Choose the highlight color for your query points."
                 )
+                # Add colormap chooser
+                gr.Markdown("### Colormap Selection")
+                gr.Markdown("*Choose a colormap for time-based visualizations (when 'Plot Time' is enabled)*")
+                # Render the colormap chooser (created earlier)
+                colormap_chooser.render_tabs()
         with gr.Column(scale=2):
             html = gr.HTML(
     </div>
     """)
+    # Update the visibility control functions
+    def update_sample_controls_visibility(reduce_sample_enabled, sample_method):
+        """Update visibility of sample reduction controls based on checkbox and method"""
+        method_visible = reduce_sample_enabled
+        slider_visible = reduce_sample_enabled and sample_method != "All"
+        seed_visible = reduce_sample_enabled and sample_method == "n random samples"
+        return (
+            gr.Dropdown(visible=method_visible),
+            gr.Slider(visible=slider_visible),
+            gr.Textbox(visible=seed_visible)
+        )
+    def update_plot_controls_visibility(plot_type):
+        """Update visibility of plot controls based on plot type"""
+        locally_approx_visible = plot_type == "Time-based coloring"
+        return gr.Checkbox(visible=locally_approx_visible)
+    # Update event handlers
+    reduce_sample_checkbox.change(
+        fn=update_sample_controls_visibility,
+        inputs=[reduce_sample_checkbox, sample_reduction_method],
+        outputs=[sample_reduction_method, sample_size_slider, seed_textbox]
+    )
     sample_reduction_method.change(
+        fn=update_sample_controls_visibility,
+        inputs=[reduce_sample_checkbox, sample_reduction_method],
+        outputs=[sample_reduction_method, sample_size_slider, seed_textbox]
     )
+    plot_type_dropdown.change(
+        fn=update_plot_controls_visibility,
+        inputs=[plot_type_dropdown],
+        outputs=[locally_approximate_publication_date_checkbox]
+    )
     def show_cancel_button():
         return gr.Button(visible=True)
             sample_size_slider,
             reduce_sample_checkbox,
             sample_reduction_method,
+            plot_type_dropdown,  # Changed from plot_time_checkbox
             locally_approximate_publication_date_checkbox,
+            # Removed treat_as_categorical_checkbox since it's now part of plot_type_dropdown
             download_csv_checkbox,
             download_png_checkbox,
             citation_graph_checkbox,
             csv_upload,
+            highlight_color_picker,
+            colormap_chooser.selected_name,
+            seed_textbox
         ],
         outputs=[html, html_download, csv_download, png_download, cancel_btn]
     )
         queue=False  # Important to make the button hide immediately
     )
+    # Connect text input changes to query display updates
+    text_input.change(
+        fn=highlight_queries,
+        inputs=text_input,
+        outputs=query_display
+    )
 # demo.static_dirs = {
 #     "static": str(static_dir)

color_utils.py CHANGED Viewed

@@ -7,8 +7,8 @@ def rgba_to_hex(color):
         # If already hex
         if color.startswith('#') and (len(color) == 7 or len(color) == 4):
             return color
-        # If rgba or rgb
-        match = re.match(r"rgba?\\(([^)]+)\\)", color)
         if match:
             parts = match.group(1).split(',')
             r = int(float(parts[0]))

         # If already hex
         if color.startswith('#') and (len(color) == 7 or len(color) == 4):
             return color
+        # If rgba or rgb - FIX: Remove extra backslashes
+        match = re.match(r"rgba?\(([^)]+)\)", color)
         if match:
             parts = match.group(1).split(',')
             r = int(float(parts[0]))

colormap_chooser.py ADDED Viewed

	@@ -0,0 +1,933 @@

+"""Colormap Chooser Gradio Component
+===================================
+A reusable, importable Gradio component that provides a **scrollable, wide-strip**
+chooser for Matplotlib (and compatible) colormaps. Designed to drop into an
+existing Gradio Blocks app.
+Features
+--------
+* Long, skinny gradient bars (not square tiles).
+* Smart sampling:
+  - Continuous maps → ~20 sample steps (configurable) interpolated across width.
+  - Categorical / qualitative maps → actual number of colors (`cmap.N`).
+* Scrollable gallery (height-capped w/ CSS).
+* Selection callback returns the **selected colormap name** (string) you can pass
+  directly to Matplotlib (`mpl.colormaps[name]` or `plt.get_cmap(name)`).
+* Optional category + search filtering UI.
+* Minimal dependencies: NumPy, Matplotlib, Gradio.
+Quick Start
+-----------
+```python
+import gradio as gr
+from colormap_chooser import ColormapChooser, setup_colormaps
+# Set up colormaps with custom collections
+categories = setup_colormaps(
+    included_collections=['matplotlib', 'cmocean', 'scientific'],
+    excluded_collections=['colorcet']
+)
+chooser = ColormapChooser(
+    categories=categories,
+    gallery_kwargs=dict(columns=4, allow_preview=True, height="400px")
+)
+with gr.Blocks() as demo:
+    with gr.Row():
+        chooser.render()  # inserts the component cluster
+    # Use chooser.selected_name as an input to your plotting fn
+    import numpy as np, matplotlib.pyplot as plt
+    def show_demo(cmap_name):
+        data = np.random.rand(32, 32)
+        fig, ax = plt.subplots()
+        im = ax.imshow(data, cmap=cmap_name)
+        ax.set_title(cmap_name)
+        fig.colorbar(im, ax=ax)
+        return fig
+    out = gr.Plot()
+    chooser.selected_name.change(show_demo, chooser.selected_name, out)
+demo.launch()
+```
+Installation
+------------
+Drop this file in your project (e.g., `colormap_chooser.py`) and import.
+Customizing
+-----------
+Pass your own category dict, default sampling counts, or CSS overrides at
+construction time; see class docstring below.
+"""
+from __future__ import annotations
+import numpy as np
+import matplotlib as mpl
+import matplotlib.colors as mcolors
+import matplotlib.pyplot as plt
+import gradio as gr
+from typing import Any, Dict, List, Optional, Sequence, Tuple
+# ------------------------------------------------------------------
+# Default category mapping (extend or replace at init)
+# ------------------------------------------------------------------
+DEFAULT_CATEGORIES: Dict[str, List[str]] = {
+    "Perceptually Uniform": ["viridis", "plasma", "inferno", "magma", "cividis"],
+    "Sequential": ["Blues", "Greens", "Oranges", "Purples", "Reds", "Greys"],
+    "Diverging": ["coolwarm", "bwr", "seismic", "PiYG", "PRGn", "RdBu"],
+    "Qualitative": ["tab10", "tab20", "Set1", "Set2", "Accent"],
+}
+# ------------------------------------------------------------------
+# Colormap setup functions
+# ------------------------------------------------------------------
+def load_matplotlib_colormaps():
+    """
+    Load matplotlib's built-in colormaps directly.
+    Returns dict of colormap_name -> colormap_object
+    """
+    matplotlib_cmaps = {}
+    # Get all matplotlib colormaps
+    for name in plt.colormaps():
+        try:
+            cmap = plt.get_cmap(name)
+            matplotlib_cmaps[name] = cmap
+        except Exception:
+            continue
+    return matplotlib_cmaps
+def load_external_colormaps():
+    """
+    Load colormaps from external packages (like colormaps, cmocean, etc.).
+    Returns dict of colormap_name -> colormap_object
+    """
+    external_cmaps = {}
+    # Try to load from colormaps package
+    try:
+        import colormaps
+        for attr_name in dir(colormaps):
+            if not attr_name.startswith('_'):
+                try:
+                    attr_value = getattr(colormaps, attr_name)
+                    # Check if it looks like a colormap
+                    if hasattr(attr_value, '__call__') or hasattr(attr_value, 'colors'):
+                        external_cmaps[attr_name] = attr_value
+                except Exception:
+                    continue
+    except ImportError:
+        pass
+    return external_cmaps
+def categorize_colormaps(
+    colormap_dict: Dict[str, any],
+    included_collections: List[str],
+    excluded_collections: List[str]
+) -> Dict[str, List[str]]:
+    """
+    Categorize colormaps by type with priority ordering.
+    Args:
+        colormap_dict: Dict of colormap_name -> colormap_object
+        included_collections: List of collection names to include
+        excluded_collections: List of collection names to exclude
+    Returns:
+        Dict {"Category": [list_of_names]} with colormaps ordered by collection priority
+    """
+    # Known categorizations based on documentation
+    matplotlib_sequential = {
+        'viridis', 'plasma', 'inferno', 'magma', 'cividis',  # Perceptually uniform
+        'ylorbr', 'ylorrd', 'orrd', 'purd', 'rdpu', 'bupu',  # Multi-hue sequential
+        'gnbu', 'pubu', 'ylgnbu', 'pubugn', 'bugn', 'ylgn',
+        'binary', 'gist_yarg', 'gist_gray', 'gray', 'bone', 'pink',  # Sequential (2)
+        'spring', 'summer', 'autumn', 'winter', 'cool', 'wistia',
+        'hot', 'afmhot', 'gist_heat', 'copper'
+    }
+    # Single-color sequential maps to exclude
+    single_color_sequential = {
+        'blues', 'greens', 'oranges', 'purples', 'reds', 'greys'
+    }
+    matplotlib_diverging = {
+        'piyg', 'prgn', 'brbg', 'puor', 'rdgy', 'rdbu',
+        'rdylbu', 'rdylgn', 'spectral', 'coolwarm', 'bwr', 'seismic',
+        'berlin', 'managua', 'vanimo'
+    }
+    matplotlib_qualitative = {
+        'pastel1', 'pastel2', 'paired', 'accent',
+        'dark2', 'set1', 'set2', 'set3',
+        'tab10', 'tab20', 'tab20b', 'tab20c'
+    }
+    matplotlib_miscellaneous = {
+        'flag', 'prism', 'ocean', 'gist_earth', 'terrain', 'gist_stern',
+        'gnuplot', 'gnuplot2', 'cmrmap', 'cubehelix', 'brg',
+        'gist_rainbow', 'rainbow', 'jet', 'turbo', 'nipy_spectral',
+        'gist_ncar', 'twilight', 'twilight_shifted', 'hsv'
+    }
+    # External colormap collections
+    cmocean_sequential = {
+        'thermal', 'haline', 'solar', 'ice', 'gray', 'oxy', 'deep', 'dense',
+        'algae', 'matter', 'turbid', 'speed', 'amp', 'tempo', 'rain'
+    }
+    cmocean_diverging = {'balance', 'delta', 'curl', 'diff', 'tarn'}
+    cmocean_other = {'phase', 'topo'}
+    scientific_sequential = {
+        'batlow', 'batlowK', 'batlowW', 'acton', 'bamako', 'bilbao', 'buda', 'davos',
+        'devon', 'grayC', 'hawaii', 'imola', 'lajolla', 'lapaz', 'nuuk', 'oslo',
+        'tokyo', 'turku', 'actonS', 'bamO', 'brocO', 'corko', 'corkO', 'davosS',
+        'grayCS', 'hawaiiS', 'imolaS', 'lajollaS', 'lapazS', 'nuukS', 'osloS',
+        'tokyoS', 'turkuS'
+    }
+    scientific_diverging = {
+        'bam', 'bamo', 'berlin', 'broc', 'brocO', 'cork', 'corko', 'lisbon',
+        'managua', 'roma', 'romao', 'tofino', 'vanimo', 'vik', 'viko'
+    }
+    cmasher_sequential = {
+        'amber', 'amethyst', 'apple', 'arctic', 'autumn', 'bubblegum', 'chroma',
+        'cosmic', 'dusk', 'ember', 'emerald', 'flamingo', 'freeze', 'gem', 'gothic',
+        'heat', 'jungle', 'lavender', 'neon', 'neutral', 'nuclear', 'ocean',
+        'pepper', 'plasma_r', 'rainforest', 'savanna', 'sunburst', 'swamp', 'torch',
+        'toxic', 'tree', 'voltage', 'voltage_r'
+    }
+    cmasher_diverging = {
+        'copper', 'emergency', 'fusion', 'guppy', 'holly', 'iceburn', 'infinity',
+        'pride', 'prinsenvlag', 'redshift', 'seasons', 'seaweed', 'viola',
+        'waterlily', 'watermelon', 'wildfire'
+    }
+    # Helper function to determine collection priority
+    def get_collection_priority(name_lower):
+        # Check matplotlib first (highest priority)
+        if (name_lower in matplotlib_sequential or name_lower in matplotlib_diverging or
+            name_lower in matplotlib_qualitative or name_lower in matplotlib_miscellaneous):
+            return 0
+        # Then cmocean
+        elif (name_lower in cmocean_sequential or name_lower in cmocean_diverging or name_lower in cmocean_other):
+            return 1
+        # Then scientific
+        elif (name_lower in scientific_sequential or name_lower in scientific_diverging):
+            return 2
+        # Then cmasher
+        elif (name_lower in cmasher_sequential or name_lower in cmasher_diverging):
+            return 3
+        # Everything else
+        else:
+            return 4
+    # Collect all valid colormaps with their categories and priorities
+    valid_colormaps = []
+    for name, cmap_obj in colormap_dict.items():
+        name_lower = name.lower()
+        # Skip numbered variants (like brbg_9, set1_9, brbg_4_r, piyg_8_r, etc.)
+        parts = name_lower.split('_')
+        if len(parts) >= 2:
+            # Check if second-to-last part is a digit (handles both name_4 and name_4_r)
+            if parts[-2].isdigit():
+                continue
+            # Also check if last part is a digit (handles name_4)
+            if parts[-1].isdigit():
+                continue
+        # Skip single-color sequential maps
+        if name_lower in single_color_sequential:
+            continue
+        # Check if we should include this colormap based on collection filters
+        include_cmap = True
+        # Check excluded collections
+        for excluded in excluded_collections:
+            if excluded.lower() in name_lower:
+                include_cmap = False
+                break
+        if not include_cmap:
+            continue
+        # Check included collections
+        if included_collections:
+            include_cmap = False
+            for included in included_collections:
+                if (included.lower() in name_lower or
+                    # Special handling for matplotlib colormaps
+                    (included == 'matplotlib' and name in plt.colormaps()) or
+                    # Special handling for known colormap sets
+                    name_lower in cmocean_sequential or name_lower in cmocean_diverging or name_lower in cmocean_other or
+                    name_lower in scientific_sequential or name_lower in scientific_diverging or
+                    name_lower in cmasher_sequential or name_lower in cmasher_diverging):
+                    include_cmap = True
+                    break
+        if not include_cmap:
+            continue
+        # Categorize the colormap
+        category = None
+        if (name_lower in matplotlib_qualitative or
+            any(qual in name_lower for qual in ['tab10', 'tab20', 'set1', 'set2', 'set3', 'paired', 'accent', 'pastel', 'dark2'])):
+            category = "Qualitative"
+        elif (name_lower in cmocean_sequential or name_lower in scientific_sequential or
+              name_lower in cmasher_sequential or name_lower in matplotlib_sequential or
+              'sequential' in name_lower or
+              any(seq in name_lower for seq in ['viridis', 'plasma', 'inferno', 'magma', 'cividis'])):
+            category = "Sequential"
+        elif (name_lower in cmocean_diverging or name_lower in scientific_diverging or
+              name_lower in cmasher_diverging or name_lower in matplotlib_diverging or
+              'diverging' in name_lower or
+              any(div in name_lower for div in ['bwr', 'coolwarm', 'seismic', 'rdbu', 'rdgy', 'piyg', 'prgn', 'brbg'])):
+            category = "Diverging"
+        else:
+            category = "Other"
+        if category:
+            priority = get_collection_priority(name_lower)
+            valid_colormaps.append((name, category, priority))
+    # Sort by category, then by priority, then by name
+    valid_colormaps.sort(key=lambda x: (x[1], x[2], x[0].lower()))
+    # Group by category while maintaining order
+    categories = {
+        "Sequential": [],
+        "Diverging": [],
+        "Qualitative": [],
+        "Other": []
+    }
+    for name, category, priority in valid_colormaps:
+        categories[category].append(name)
+    # Remove empty categories and hide "Other" category
+    final_categories = {}
+    for cat_name, cmap_names in categories.items():
+        if cmap_names and cat_name != "Other":  # Hide "Other" category
+            final_categories[cat_name] = cmap_names
+    return final_categories
+def setup_colormaps(
+    included_collections: Optional[List[str]] = None,
+    excluded_collections: Optional[List[str]] = None,
+    additional_colormaps: Optional[Dict[str, any]] = None
+) -> Dict[str, List[str]]:
+    """
+    Set up and categorize colormaps from various sources.
+    Args:
+        included_collections: List of collection names to include
+            (e.g., ['matplotlib', 'cmocean', 'scientific'])
+        excluded_collections: List of collection names to exclude
+        additional_colormaps: Dict of additional colormaps to include
+    Returns:
+        Dict of {"Category": [list_of_colormap_names]} ready for ColormapChooser
+    """
+    if excluded_collections is None:
+        excluded_collections = ['colorcet', 'carbonplan', 'sciviz']
+    if included_collections is None:
+        included_collections = ['matplotlib', 'cmocean', 'scientific', 'cmasher', 'colorbrewer', 'cartocolors']
+    # Combine all colormaps
+    all_colormaps = {}
+    # Add matplotlib colormaps
+    if 'matplotlib' in included_collections:
+        matplotlib_cmaps = load_matplotlib_colormaps()
+        all_colormaps.update(matplotlib_cmaps)
+        print(f"Added {len(matplotlib_cmaps)} matplotlib colormaps")
+    # Add external colormaps
+    try:
+        external_cmaps = load_external_colormaps()
+        all_colormaps.update(external_cmaps)
+        print(f"Added {len(external_cmaps)} external colormaps")
+    except Exception as e:
+        print(f"Could not load external colormaps: {e}")
+    # Add any additional colormaps
+    if additional_colormaps:
+        all_colormaps.update(additional_colormaps)
+        print(f"Added {len(additional_colormaps)} additional colormaps")
+    # Categorize colormaps
+    return categorize_colormaps(all_colormaps, included_collections, excluded_collections)
+# ------------------------------------------------------------------
+# Utility helpers
+# ------------------------------------------------------------------
+def _flatten_categories(categories: Dict[str, Sequence[str]]) -> List[str]:
+    names = []
+    for _, vals in categories.items():
+        names.extend(vals)
+    # maintain insertion order; drop dupes while preserving first occurrence
+    seen = set()
+    out = []
+    for n in names:
+        if n not in seen:
+            seen.add(n)
+            out.append(n)
+    return out
+def _build_name2cat(categories: Dict[str, Sequence[str]]) -> Dict[str, str]:
+    m = {}
+    for cat, vals in categories.items():
+        for n in vals:
+            m[n] = cat
+    return m
+# ------------------------------------------------------------------
+# Sampling policy
+# ------------------------------------------------------------------
+def _is_categorical_cmap(
+    cmap: mcolors.Colormap,
+    declared_category: Optional[str] = None,
+    qualitative_label: str = "Qualitative",
+    max_auto: int = 32,
+) -> bool:
+    """Heuristic: treat as categorical/qualitative.
+    Priority:
+    1. If user-declared category == qualitative_label → True.
+    2. If ListedColormap with small N → True.
+    3. If colormap name suggests it's qualitative → True.
+    4. Else False (continuous).
+    """
+    # Check if explicitly declared as qualitative
+    if declared_category == qualitative_label:
+        return True
+    # Check if it's a ListedColormap with small N
+    if isinstance(cmap, mcolors.ListedColormap) and cmap.N <= max_auto:
+        return True
+    # Additional check: if the colormap name suggests it's qualitative
+    # This is a fallback in case the declared_category doesn't match exactly
+    if hasattr(cmap, 'name'):
+        name_lower = cmap.name.lower()
+        qualitative_names = {
+            'tab10', 'tab20', 'tab20b', 'tab20c', 'set1', 'set2', 'set3',
+            'pastel1', 'pastel2', 'paired', 'accent', 'dark2'
+        }
+        if name_lower in qualitative_names:
+            return True
+    return False
+def _cmap_strip(
+    name: str,
+    width: int = 10,
+    height: int = 16,
+    smooth_steps: int = 20,
+    declared_category: Optional[str] = None,
+    qualitative_label: str = "Qualitative",
+    max_auto: int = 32,
+):
+    """Return RGB uint8 preview strip for *name* colormap.
+    Continuous maps are resampled to *smooth_steps* and linearly interpolated.
+    Categorical maps use actual number of colors, but adapt to available width.
+    """
+    cmap = mpl.colormaps[name]
+    categorical = _is_categorical_cmap(
+        cmap, declared_category=declared_category, qualitative_label=qualitative_label, max_auto=max_auto
+    )
+    if categorical:
+        n = cmap.N
+        if hasattr(cmap, "colors"):
+            cols = np.asarray(cmap.colors)
+            if cols.shape[1] == 4:
+                cols = cols[:, :3]
+        else:
+            xs = np.linspace(0, 1, n, endpoint=False) + (0.5 / n)
+            cols = cmap(xs)[..., :3]
+        # Adaptive approach based on available width
+        min_block_width = 3  # Minimum pixels per color block for visibility
+        if width >= n * min_block_width:
+            # We have enough width to show all colors as distinct blocks
+            block_w = width // n
+            selected_cols = cols
+            num_blocks = n
+        else:
+            # Not enough width - show a representative sample
+            max_colors_that_fit = max(2, width // min_block_width)  # At least 2 colors
+            if max_colors_that_fit >= n:
+                # We can fit all colors
+                selected_cols = cols
+                num_blocks = n
+                block_w = width // n
+            else:
+                # Sample evenly across the colormap
+                indices = np.linspace(0, n-1, max_colors_that_fit, dtype=int)
+                selected_cols = cols[indices]
+                num_blocks = max_colors_that_fit
+                block_w = width // num_blocks
+        # Debug output for categorical sampling
+        if name.lower() in ['tab10', 'tab20', 'set1', 'set2', 'accent', 'paired']:
+            print(f'CATEGORICAL SAMPLING DEBUG: {name}')
+            print(f'  n (total colors): {n}')
+            print(f'  width: {width}')
+            print(f'  num_blocks (colors shown): {num_blocks}')
+            print(f'  block_w (width per color): {block_w}')
+            print(f'  showing all colors: {num_blocks == n}')
+            print('---')
+        # Create the array with discrete blocks
+        arr = np.repeat(selected_cols[np.newaxis, :, :], height, axis=0)  # (h,num_blocks,3)
+        arr = np.repeat(arr, block_w, axis=1)  # (h,num_blocks*block_w,3)
+        # Handle any remaining width
+        current_width = arr.shape[1]
+        if current_width < width:
+            # Pad by extending the last color
+            pad = width - current_width
+            last_color = arr[:, -1:, :]  # Get last column
+            padding = np.repeat(last_color, pad, axis=1)
+            arr = np.concatenate([arr, padding], axis=1)
+        elif current_width > width:
+            # Trim to exact width
+            arr = arr[:, :width, :]
+        return (arr * 255).astype(np.uint8)
+    # continuous - unchanged
+    xs = np.linspace(0, 1, smooth_steps)
+    cols = cmap(xs)[..., :3]
+    xi = np.linspace(0, smooth_steps - 1, width)
+    lo = np.floor(xi).astype(int)
+    hi = np.minimum(lo + 1, smooth_steps - 1)
+    t = xi - lo
+    strip = (1 - t)[:, None] * cols[lo] + t[:, None] * cols[hi]
+    arr = np.repeat(strip[np.newaxis, :, :], height, axis=0)
+    return (arr * 255).astype(np.uint8)
+# ------------------------------------------------------------------
+# ColormapChooser class
+# ------------------------------------------------------------------
+class ColormapChooser:
+    """Reusable scrollable colormap selector for Gradio.
+    Parameters
+    ----------
+    categories:
+        Dict mapping *Category Label* → list of cmap names. If None, uses
+        DEFAULT_CATEGORIES defined above. You may pass additional categories or
+        override existing ones. Order preserved.
+    smooth_steps:
+        Approx sample count for continuous maps (default 20).
+    strip_width:
+        Pixel width of preview strip images (default 512).
+    strip_height:
+        Pixel height of preview strip images (default 16).
+    css_height:
+        Max CSS height (pixels) for the scrollable gallery viewport.
+    qualitative_label:
+        Category label used to force qualitative sampling when present.
+    max_auto:
+        If a ListedColormap has N <= max_auto, treat as categorical even if not
+        declared Qualitative.
+    elem_id:
+        DOM id for the gallery (used to scope CSS overrides). Default 'cmap_gallery'.
+    show_search:
+        Whether to render the search Textbox.
+    show_category:
+        Whether to render the category Radio selector.
+    show_preview:
+        Show the big preview strip under the gallery.  Off by default.
+    show_selected_name:
+        Show the textbox that echoes the selected colormap name.  Off by default.
+    show_selected_info:
+        Show the markdown info line.  Off by default.
+    gallery_kwargs:
+        Dictionary of keyword arguments to pass to the Gradio Gallery component
+        when it is created. For example, `columns=4, allow_preview=True, height="400px"`.
+    Public attributes after render():
+        category (optional)
+        search (optional)
+        gallery
+        preview
+        selected_name  (Textbox; value string)
+        selected_info  (Markdown)
+        names_state    (State of current filtered cmap names)
+    Usage: see module Quick Start above.
+    """
+    def __init__(
+        self,
+        *,
+        categories: Optional[Dict[str, Sequence[str]]] = None,
+        smooth_steps: int = 10,
+        strip_width: int = 10,
+        strip_height: int = 16,
+        css_height: int = 240,
+        qualitative_label: str = "Qualitative",
+        max_auto: int = 32,
+        elem_id: str = "cmap_gallery",
+        show_search: bool = True,
+        show_category: bool = True,
+        columns: int = 3,
+        thumb_margin_px: int = 2,          # NEW
+        gallery_kwargs: Optional[Dict[str, Any]] = None,
+        show_preview: bool = False,
+        show_selected_name: bool = False,
+        show_selected_info: bool = True,
+    ) -> None:
+        self.categories = categories if categories is not None else DEFAULT_CATEGORIES
+        self.smooth_steps = smooth_steps
+        self.strip_width = strip_width
+        self.strip_height = strip_height
+        self.css_height = css_height
+        self.qualitative_label = qualitative_label
+        self.max_auto = max_auto
+        self.elem_id = elem_id
+        self.show_search = show_search
+        self.show_category = show_category
+        self.columns = columns
+        self.thumb_margin_px = thumb_margin_px   # NEW
+        self.gallery_kwargs = gallery_kwargs or {}
+        # visibility flags
+        self.show_preview = show_preview
+        self.show_selected_name = show_selected_name
+        self.show_selected_info = show_selected_info
+        self._all_names = _flatten_categories(self.categories)
+        self._name2cat = _build_name2cat(self.categories)
+        self._tile_cache: Dict[str, np.ndarray] = {}
+        # public gradio components (populated in render)
+        self.category = None
+        self.search = None
+        self.gallery = None
+        self.preview = None
+        self.selected_name = None
+        self.selected_info = None
+        self.names_state = None
+    # ------------------
+    # internal helpers
+    # ------------------
+    def _tile(self, name: str) -> np.ndarray:
+        if name not in self._tile_cache:
+            self._tile_cache[name] = _cmap_strip(
+                name,
+                width=self.strip_width,
+                height=self.strip_height,
+                smooth_steps=self.smooth_steps,
+                declared_category=self._name2cat.get(name),
+                qualitative_label=self.qualitative_label,
+                max_auto=self.max_auto,
+            )
+        return self._tile_cache[name]
+    def _make_gallery_items(self, names: Sequence[str]):
+        return [(self._tile(n), n) for n in names]
+    # ------------------
+    # event functions
+    # ------------------
+    def _filter(self, cat: str, s: str):
+        if self.show_category and cat in self.categories:
+            names = list(self.categories[cat])
+        else:
+            names = list(self._all_names)
+        if s and self.show_search:
+            sl = s.lower()
+            names = [n for n in names if sl in n.lower()]
+        # Remember new list for the select-callback
+        self.names_state.value = names
+        # 1) return an updated gallery
+        gkw = {
+            "value": self._make_gallery_items(names),
+            "selected_index": None,
+        }
+        gkw.update(self.gallery_kwargs)
+        gallery_update = gr.Gallery(**gkw)
+        # 2) clear the other widgets so old selection disappears
+        preview_update  = gr.update(value=None)
+        name_update     = gr.update(value="")
+        info_update     = gr.update(value="")
+        return gallery_update, preview_update, name_update, info_update
+    def _select(self, evt: gr.SelectData, names: Sequence[str]):
+        if not names or evt.index is None or evt.index >= len(names):
+            return gr.update(), "", "Nothing selected"
+        name = names[evt.index]
+        big = _cmap_strip(
+            name,
+            width=max(self.strip_width * 2, 768),
+            height=max(self.strip_height * 2, 32),
+            smooth_steps=self.smooth_steps,
+            declared_category=self._name2cat.get(name),
+            qualitative_label=self.qualitative_label,
+            max_auto=self.max_auto,
+        )
+        info = f"**Selected:** `{name}` _(Category: {self._name2cat.get(name, '?')})_"
+        return big, name, info
+    # ------------------
+    # CSS block builder
+    # ------------------
+    def css(self) -> str:
+        return f"""
+        /* ───── 0. easy visual check the CSS is live (remove later) ───── */
+        #{self.elem_id} {{
+        /* background:rgba(255,255,0,.05); */
+        }}
+        /* the wrapper *is* the .block, so it owns the padding var */
+        #{self.elem_id}_wrap {{
+            padding: 0 !important;
+            --block-padding: 0 !important;
+        }}
+        /* ───── 1. the wrapper Gradio marks .fixed-height: make it scroll  ─── */
+        #{self.elem_id} .grid-wrap {{
+            height: {self.css_height}px;          /* kill inline 200 px or similar */
+            max-height: {self.css_height}px;  /* cap the gallery’s height      */
+            overflow-y: auto;                 /* rows that don’t fit will scroll */
+        }}
+        /* ───── 2. the real grid: keep masonry maths intact, tweak gap ─── */
+        #{self.elem_id} .grid-container {{
+            height: auto !important;          /* sometimes Gradio sets one     */
+            gap: 7px;        /* tighter gutters (define attr) */
+            grid-auto-rows:auto !important;
+        }}
+        /* ───── 3. thumbnail boxes keep your ultra-wide shape ──────────── */
+        #{self.elem_id} .thumbnail-item {{
+            aspect-ratio: 3/1;  /* e.g. 5/1 */
+            height: auto !important;          /* beats Gradio’s inline 100 %   */
+            margin: {self.thumb_margin_px}px !important;
+            overflow: hidden;                 /* just in case                  */
+        }}
+        /* ───── 4. images fill each box neatly ─────────────────────────── */
+        #{self.elem_id} img {{
+            width: 100%;
+            height: 100%;
+            object-fit: cover;                /* crop to fill                  */
+            object-position: left;
+            display: block;                   /* kill inline-img whitespace    */
+        }}
+        /* ───── 5. widen the “Selected:” info line ───────────────────── */
+        .cmap_selected_info {{
+            max-width: 100% !important;   /* kill default 45 rem limit   */
+        }}
+        """
+    # ------------------
+    # Render into an existing Blocks context
+    # ------------------
+    def render(self):
+        """Create Gradio UI elements and wire callbacks.
+        Must be called *inside* an active `gr.Blocks()` context.
+        Returns a tuple `(components_dict)` for convenience.
+        """
+        # initial list: first category or all
+        if self.show_category:
+            first_cat = next(iter(self.categories))
+            init_names = list(self.categories[first_cat])
+        else:
+            init_names = list(self._all_names)
+        # preheat tiles lazily on demand; no bulk precompute
+        # (call _tile when building gallery items)
+        # layout
+        if self.show_category or self.show_search:
+            with gr.Row():
+                if self.show_category:
+                    self.category = gr.Radio(list(self.categories.keys()), value=first_cat, label="Category")
+                else:
+                    self.category = gr.State(None)  # shim so filter signature works
+                if self.show_search:
+                    self.search = gr.Textbox(label="Search", placeholder="type to filter...")
+                else:
+                    self.search = gr.State("")
+        else:
+            self.category = gr.State(None)
+            self.search = gr.State("")
+        self.names_state = gr.State(init_names)
+        gkw = {
+            "value": self._make_gallery_items(init_names),
+            "label": None,                   # remove label
+            "allow_preview": False,
+            "elem_id": self.elem_id,
+            "show_share_button": False,
+            "columns": getattr(self, "columns", 3),
+        }
+        gkw.update(self.gallery_kwargs)
+        self.gallery = gr.Gallery(**gkw)
+        self.preview = gr.Image(
+            label="Preview", interactive=False, height=60, visible=self.show_preview
+        )
+        self.selected_name = gr.Textbox(
+            label="Selected cmap", interactive=False, visible=self.show_selected_name
+        )
+        self.selected_info = gr.Markdown(
+            visible=self.show_selected_info,
+            elem_classes="cmap_selected_info",
+        )
+        # wiring
+        if self.show_category or self.show_search:
+            def _wrapped_filter(cat, s):
+                if not self.show_category:
+                    cat = None
+                if not self.show_search:
+                    s = ""
+                return self._filter(cat, s)
+            outputs = [self.gallery,
+                       self.preview,
+                       self.selected_name,
+                       self.selected_info]
+            if self.show_category:
+                self.category.change(
+                    _wrapped_filter,
+                    [self.category, self.search],
+                    outputs
+                )
+            if self.show_search:
+                self.search.change(
+                    _wrapped_filter,
+                    [self.category, self.search],
+                    outputs
+                )
+        def _wrapped_select(evt: gr.SelectData, names):
+            return self._select(evt, names)
+        self.gallery.select(_wrapped_select, [self.names_state],
+                            [self.preview, self.selected_name, self.selected_info])
+        return {
+            "gallery": self.gallery,
+            "selected_name": self.selected_name,
+            "preview": self.preview,
+            "info": self.selected_info,
+            "category": self.category,
+            "search": self.search,
+            "names_state": self.names_state,
+        }
+    # ==========================================================
+    # NEW TAB-BASED RENDERER
+    # ==========================================================
+    def render_tabs(self):
+        """
+        Render the chooser as one Gallery per category inside a gradio Tabs
+        container.  No search box is provided – each tab already filters
+        by category.
+        Returns the same components dict as `render()`, plus a "galleries"
+        dict that maps category → Gallery component.
+        """
+        galleries = {}
+        with gr.Tabs() as root_tabs:
+            # --- build a tab + gallery for every category -------------
+            for cat, names in self.categories.items():
+                with gr.TabItem(cat):
+                    gkw = {
+                        "value": self._make_gallery_items(names),
+                       "label": None,         # remove label
+                        "allow_preview": False,
+                        "show_share_button": False,
+                        "elem_id": self.elem_id,
+                        "columns": getattr(self, "columns", 3),
+                        "show_label": False
+                    }
+                    gkw.update(self.gallery_kwargs)
+                    with gr.Row(elem_id=f"{self.elem_id}_wrap"):   # ← new wrapper
+                        gal = gr.Gallery(**gkw)
+                    galleries[cat] = gal
+        # --- shared preview / meta area under the tabs ----------------
+        self.preview = gr.Image(
+            label="Preview", interactive=False, height=60, visible=self.show_preview
+        )
+        self.selected_name = gr.Textbox(
+            label="Selected cmap", interactive=False, visible=self.show_selected_name
+        )
+        self.selected_info = gr.Markdown(
+            visible=self.show_selected_info,
+            elem_classes="cmap_selected_info",
+        )
+        # --- wiring: every gallery uses the same _select callback -----
+        def _wrapped_select(evt: gr.SelectData, names):
+            return self._select(evt, names)
+        for cat, gal in galleries.items():
+            gal.select(
+                _wrapped_select,
+                [gr.State(list(self.categories[cat]))],      # names list
+                [self.preview, self.selected_name, self.selected_info],
+            )
+        return {
+            "galleries": galleries,
+            "selected_name": self.selected_name,
+            "preview": self.preview,
+            "info": self.selected_info,
+            "tabs": root_tabs,
+        }
+# ------------------------------------------------------------------
+# Minimal self-demo (only runs if module executed directly)
+# ------------------------------------------------------------------
+if __name__ == "__main__":
+    chooser = ColormapChooser()
+    with gr.Blocks(css=chooser.css()) as demo:
+        gr.Markdown("## Colormap Chooser Demo")
+        chooser.render()
+    demo.launch()

colormap_chooser_testing_app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import gradio as gr
+from colormap_chooser import ColormapChooser, setup_colormaps
+# Set up colormaps with our preferred collections and ordering
+print("Setting up colormaps...")
+categories = setup_colormaps(
+    included_collections=['matplotlib', 'cmocean', 'scientific', 'cmasher', 'colorbrewer', 'cartocolors'],
+    excluded_collections=['colorcet', 'carbonplan', 'sciviz']
+)
+# Create the chooser with our categories
+chooser = ColormapChooser(
+    categories=categories,
+    smooth_steps=10,
+    strip_width=200,
+    strip_height=50,
+    css_height=180,            # outer box height (becomes a scroll-pane)
+    thumb_margin_px=2,         # more space between strips
+    gallery_kwargs=dict(columns=3, allow_preview=False, height="200px")   # anything else you need
+)
+print(chooser.css())
+with gr.Blocks(css=chooser.css()) as demo:
+    with gr.Row():
+        with gr.Column(scale=1):
+            chooser.render_tabs()
+        with gr.Column(scale=2):
+            plot = gr.Plot(label="Demo Plot")
+    # When the user picks a cmap, update the plot
+    def _plot(name):
+        print(f"Plotting {name}")
+        import numpy as np, matplotlib.pyplot as plt
+        data = np.random.RandomState(0).randn(100,100)
+        fig, ax = plt.subplots()
+        im = ax.imshow(data, cmap=name)
+        fig.colorbar(im, ax=ax)
+        plt.close(fig)
+        return fig
+    chooser.selected_name.change(_plot, chooser.selected_name, plot)
+demo.launch(debug=True, share=False, inbrowser=True)

openalex_utils.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import numpy as np
 from urllib.parse import urlparse, parse_qs
-from pyalex import Works
 import pandas as pd
 import ast, json
@@ -213,4 +213,164 @@ def get_records_from_dois(doi_list, block_size=50):
             all_records.extend(record_list)
         except Exception as e:
             print(f"Error fetching DOIs {sublist}: {e}")
-    return pd.DataFrame(all_records)

 import numpy as np
 from urllib.parse import urlparse, parse_qs
+from pyalex import Works, Authors, Institutions
 import pandas as pd
 import ast, json
             all_records.extend(record_list)
         except Exception as e:
             print(f"Error fetching DOIs {sublist}: {e}")
+    return pd.DataFrame(all_records)
+def openalex_url_to_readable_name(url):
+    """
+    Convert an OpenAlex URL to a short, human-readable query description.
+    Args:
+    url (str): The OpenAlex search URL
+    Returns:
+    str: A short, human-readable description of the query
+    Examples:
+    - "Search: 'Kuramoto Model'"
+    - "Search: 'quantum physics', 2020-2023"
+    - "Cites: Popper (1959)"
+    - "From: University of Pittsburgh, 1999-2020"
+    - "By: Einstein, A., 1905-1955"
+    """
+    import re
+    # Parse the URL
+    parsed_url = urlparse(url)
+    query_params = parse_qs(parsed_url.query)
+    # Initialize description parts
+    parts = []
+    year_range = None
+    # Handle filters
+    if 'filter' in query_params:
+        filters = query_params['filter'][0].split(',')
+        for f in filters:
+            if ':' not in f:
+                continue
+            key, value = f.split(':', 1)
+            try:
+                if key == 'default.search':
+                    # Clean up search term (remove quotes if present)
+                    search_term = value.strip('"\'')
+                    parts.append(f"Search: '{search_term}'")
+                elif key == 'publication_year':
+                    # Handle year ranges or single years
+                    if '-' in value:
+                        start_year, end_year = value.split('-')
+                        year_range = f"{start_year}-{end_year}"
+                    else:
+                        year_range = value
+                elif key == 'cites':
+                    # Look up the cited work to get author and year
+                    work_id = value
+                    try:
+                        cited_work = Works()[work_id]
+                        if cited_work:
+                            # Get first author's last name
+                            author_name = "Unknown"
+                            year = "Unknown"
+                            if cited_work.get('authorships') and len(cited_work['authorships']) > 0:
+                                first_author = cited_work['authorships'][0]['author']
+                                if first_author.get('display_name'):
+                                    # Extract last name (assuming "First Last" format)
+                                    name_parts = first_author['display_name'].split()
+                                    author_name = name_parts[-1] if name_parts else first_author['display_name']
+                            if cited_work.get('publication_year'):
+                                year = str(cited_work['publication_year'])
+                            parts.append(f"Cites: {author_name} ({year})")
+                        else:
+                            parts.append(f"Cites: Work {work_id}")
+                    except Exception as e:
+                        print(f"Could not fetch cited work {work_id}: {e}")
+                        parts.append(f"Cites: Work {work_id}")
+                elif key == 'authorships.institutions.lineage':
+                    # Look up institution name
+                    inst_id = value
+                    try:
+                        institution = Institutions()[inst_id]
+                        if institution and institution.get('display_name'):
+                            parts.append(f"From: {institution['display_name']}")
+                        else:
+                            parts.append(f"From: Institution {inst_id}")
+                    except Exception as e:
+                        print(f"Could not fetch institution {inst_id}: {e}")
+                        parts.append(f"From: Institution {inst_id}")
+                elif key == 'authorships.author.id':
+                    # Look up author name
+                    author_id = value
+                    try:
+                        author = Authors()[author_id]
+                        if author and author.get('display_name'):
+                            parts.append(f"By: {author['display_name']}")
+                        else:
+                            parts.append(f"By: Author {author_id}")
+                    except Exception as e:
+                        print(f"Could not fetch author {author_id}: {e}")
+                        parts.append(f"By: Author {author_id}")
+                elif key == 'type':
+                    # Handle work types
+                    type_mapping = {
+                        'article': 'Articles',
+                        'book': 'Books',
+                        'book-chapter': 'Book Chapters',
+                        'dissertation': 'Dissertations',
+                        'preprint': 'Preprints'
+                    }
+                    work_type = type_mapping.get(value, value.replace('-', ' ').title())
+                    parts.append(f"Type: {work_type}")
+                elif key == 'host_venue.id':
+                    # Look up venue name
+                    venue_id = value
+                    try:
+                        # For venues, we can use Works to get source info, but let's try a direct approach
+                        # This might need adjustment based on pyalex API structure
+                        parts.append(f"In: Venue {venue_id}")  # Fallback
+                    except Exception as e:
+                        parts.append(f"In: Venue {venue_id}")
+                elif key.startswith('concepts.id'):
+                    # Handle concept filters - these are topic/concept IDs
+                    concept_id = value
+                    parts.append(f"Topic: {concept_id}")  # Could be enhanced with concept lookup
+                else:
+                    # Generic handling for other filters
+                    clean_key = key.replace('_', ' ').replace('.', ' ').title()
+                    clean_value = value.replace('_', ' ')
+                    parts.append(f"{clean_key}: {clean_value}")
+            except Exception as e:
+                print(f"Error processing filter {f}: {e}")
+                continue
+    # Combine parts into final description
+    if not parts:
+        description = "OpenAlex Query"
+    else:
+        description = ", ".join(parts)
+    # Add year range if present
+    if year_range:
+        if parts:
+            description += f", {year_range}"
+        else:
+            description = f"Works from {year_range}"
+    # Limit length to keep it readable
+    if len(description) > 100:
+        description = description[:97] + "..."
+    return description

ui_utils.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""
+UI utility functions for the OpenAlex Mapper Gradio app.
+"""
+from openalex_utils import openalex_url_to_readable_name
+def highlight_queries(text: str) -> str:
+    """Split OpenAlex URLs on semicolons and display them as colored pills with readable names."""
+    palette = ["#f5f5f5", #set to  only light grey
+        # "#e8f4fd", "#fff2e8", "#f0f9e8", "#fdf2f8",
+        # "#f3e8ff", "#e8f8f5", "#fef7e8", "#f8f0e8"
+    ]
+    # Handle empty input
+    if not text or not text.strip():
+        return "<div style='padding: 10px; color: #666; font-style: italic;'>Enter OpenAlex URLs separated by semicolons to see query descriptions</div>"
+    # Split URLs on semicolons and strip whitespace
+    urls = [url.strip() for url in text.split(";") if url.strip()]
+    if not urls:
+        return "<div style='padding: 10px; color: #666; font-style: italic;'>No valid URLs found</div>"
+    pills = []
+    for i, url in enumerate(urls):
+        color = palette[i % len(palette)]
+        try:
+            # Get readable name for the URL
+            readable_name = openalex_url_to_readable_name(url)
+        except Exception as e:
+            print(f"Error processing URL {url}: {e}")
+            readable_name = f"Query {i+1}"
+        pills.append(
+            f'<span style="background:{color};'
+            'padding: 8px 12px; margin: 4px; '
+            'border-radius: 12px; font-weight: 500;'
+            'display: inline-block; font-family: \'Roboto Condensed\', sans-serif;'
+            'border: 1px solid rgba(0,0,0,0.1); font-size: 14px;'
+            'box-shadow: 0 1px 3px rgba(0,0,0,0.1);">'
+            f'{readable_name}</span>'
+        )
+    return (
+        "<div style='padding: 8px 0;'>"
+        "<div style='font-size: 12px; color: #666; margin-bottom: 6px; font-weight: 500;'>"
+        f"{'Query' if len(urls) == 1 else 'Queries'} ({len(urls)}):</div>"
+        "<div style='display: flex; flex-wrap: wrap; gap: 4px;'>"
+        + "".join(pills) +
+        "</div></div>"
+    )