import gradio as gr from docling.document_converter import DocumentConverter def convert_document(file, output_format): # Load document and convert it using Docling converter = DocumentConverter() result = converter.convert(file.name) # Choose the output format if output_format == "Markdown": converted_text = result.document.export_to_markdown() elif output_format == "JSON": converted_text = result.document.export_to_json() else: converted_text = "Unsupported format" # Extract metadata metadata = { "Title": result.document.title, "Author": result.document.author, "Language": result.document.language, "References": result.document.references } return converted_text, metadata # Define the Gradio interface input_file = gr.inputs.File(label="Upload Document") output_format = gr.inputs.Radio(["Markdown", "JSON"], label="Choose Output Format") output_text = gr.outputs.Textbox(label="Converted Document") output_metadata = gr.outputs.JSON(label="Metadata") app = gr.Interface( fn=convert_document, inputs=[input_file, output_format], outputs=[output_text, output_metadata], title="Document Converter with Docling", description="Upload a document (PDF, DOCX, or image), choose the output format, and get the converted document text along with metadata.", ) app.launch()