<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>PDF to DOCX</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/marked/15.0.7/marked.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/pdfjs-dist@3.4.120/build/pdf.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/pdfjs-dist@3.4.120/build/pdf.worker.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/html-docx-js@0.3.1/dist/html-docx.min.js"></script>
    <script src="https://cdn.jsdelivr.net/npm/file-saver@2.0.5/dist/FileSaver.min.js"></script>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f4f4f9;
        }
        .container {
            max-width: 1000px;
            margin: 0 auto;
            background-color: white;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }
        h1 {
            text-align: center;
            color: #333;
        }
        .form-group {
            margin-bottom: 20px;
        }
        label {
            display: block;
            margin-bottom: 5px;
            font-weight: bold;
        }
        input[type="file"] {
            width: 100%;
            padding: 8px;
            border: 1px solid #ddd;
            border-radius: 4px;
        }
        button {
            background-color: #007bff;
            color: white;
            border: none;
            padding: 10px 15px;
            border-radius: 4px;
            cursor: pointer;
            font-size: 16px;
        }
        button:hover {
            background-color: #0056b3;
        }
        button:disabled {
            background-color: #cccccc;
            cursor: not-allowed;
        }
        #pdf-preview {
            margin-top: 20px;
            border: 1px solid #ddd;
            border-radius: 4px;
            padding: 10px;
            min-height: 200px;
            text-align: center;
        }
        #loadings {
            display: none;
            justify-content: center;
            align-items: center;
            position: fixed;
            top: 0;
            left: 0;
            width: 100%;
            height: 100%;
            background-color: rgba(255, 255, 255, 0.7);
            z-index: 1000;
        }
        .spinner {
            border: 4px solid rgba(0, 0, 0, 0.1);
            border-radius: 50%;
            border-top: 4px solid #007bff;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
        #drop-area {
            border: 2px dashed #ccc;
            border-radius: 4px;
            padding: 20px;
            text-align: center;
            margin-bottom: 20px;
            transition: background-color 0.3s;
        }
        #drop-area.active {
            background-color: #f0f8ff;
        }
        .pdf-pages {
            display: flex;
            flex-direction: column;
            align-items: center;
        }
        .page-controls {
            margin-top: 10px;
            display: flex;
            justify-content: center;
            align-items: center;
            gap: 10px;
        }
        .page-info {
            font-size: 14px;
        }
        .page-nav button {
            padding: 5px 10px;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>PDF to OCR and DOCX</h1>

        <div class="form-group">
            <label for="pdf-uploads">Upload PDF:</label>
            <input type="file" id="pdf-uploads" accept=".pdf" style="display: none;">
            <button id="upload-btns">Choose PDF</button>
            <div id="drop-area">
                <p>Or drag and drop a PDF file here</p>
            </div>
            <p id="file-names">No file selected</p>
        </div>
        
        <button id="download-btn" disabled>Download DOCX</button>
        

        <div id="pdf-preview"></div>

        

        <div id="loadings">
            <div class="spinner"></div>
        </div>
    </div>

    <script>
        (function() {
            // Initialize PDF.js
            pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdn.jsdelivr.net/npm/pdfjs-dist@3.4.120/build/pdf.worker.min.js';

            // DOM Elements
            const uploadBtnn = document.getElementById('upload-btns');
            const fileInputs = document.getElementById('pdf-uploads');
            const fileNameDisplays = document.getElementById('file-names');
            const pdfPreview = document.getElementById('pdf-preview');
            const downloadBtn = document.getElementById('download-btn');
            const loadingsIndicators = document.getElementById('loadings');
            const dropArea = document.getElementById('drop-area');

            // State
            let pdfFile = null;
            let currentPdfUrl = null;

            // Fetch API keys from the database
            async function fetchApiKey() {
                try {
                    const apiResponse = await fetch('get_api_keys.php');
                    const apiData = await apiResponse.json();

                    if (!apiData.success) {
                        throw new Error(apiData.message || 'Failed to fetch API keys.');
                    }

                    return apiData.mistral_api_key;
                } catch (error) {
                    console.error('Error fetching API key:', error);
                    alert(`Error fetching API key: ${error.message}`);
                    return null;
                }
            }

            // PDF Preview Functions
            async function loadPdfPreview(pdfUrl) {
                pdfPreview.innerHTML = '';

                try {
                    const loadingsTask = pdfjsLib.getDocument(pdfUrl);
                    const pdf = await loadingsTask.promise;

                    const pagesContainer = document.createElement('div');
                    pagesContainer.className = 'pdf-pages';
                    pdfPreview.appendChild(pagesContainer);

                    const pageControls = document.createElement('div');
                    pageControls.className = 'page-controls';
                    pageControls.innerHTML = `
                        <div class="page-info">Page 1 of ${pdf.numPages}</div>
                        <div class="page-nav">
                            <button id="prev-page" disabled>Previous</button>
                            <button id="next-page" ${pdf.numPages === 1 ? 'disabled' : ''}>Next</button>
                        </div>
                    `;
                    pdfPreview.appendChild(pageControls);

                    let currentPage = 1;
                    await renderPage(pdf, currentPage, pagesContainer);

                    document.getElementById('prev-page').addEventListener('click', async () => {
                        if (currentPage > 1) {
                            currentPage--;
                            pagesContainer.innerHTML = '';
                            await renderPage(pdf, currentPage, pagesContainer);
                            updatePageControls(currentPage, pdf.numPages);
                        }
                    });

                    document.getElementById('next-page').addEventListener('click', async () => {
                        if (currentPage < pdf.numPages) {
                            currentPage++;
                            pagesContainer.innerHTML = '';
                            await renderPage(pdf, currentPage, pagesContainer);
                            updatePageControls(currentPage, pdf.numPages);
                        }
                    });
                } catch (error) {
                    console.error('Error loadings PDF preview:', error);
                    pdfPreview.innerHTML = '<div class="error">Failed to load PDF preview</div>';
                }
            }

            async function renderPage(pdf, pageNum, container) {
                const page = await pdf.getPage(pageNum);
                const viewport = page.getViewport({ scale: 1.0 });
                const containerWidth = container.clientWidth || pdfPreview.clientWidth;
                const scale = containerWidth / viewport.width;
                const scaledViewport = page.getViewport({ scale: scale * 0.95 });

                const canvas = document.createElement('canvas');
                const context = canvas.getContext('2d');
                canvas.height = scaledViewport.height;
                canvas.width = scaledViewport.width;

                const renderContext = {
                    canvasContext: context,
                    viewport: scaledViewport
                };

                await page.render(renderContext).promise;
                container.appendChild(canvas);
            }

            function updatePageControls(currentPage, totalPages) {
                const pageInfo = document.querySelector('.page-info');
                const prevButton = document.getElementById('prev-page');
                const nextButton = document.getElementById('next-page');

                pageInfo.textContent = `Page ${currentPage} of ${totalPages}`;
                prevButton.disabled = currentPage === 1;
                nextButton.disabled = currentPage === totalPages;
            }

            // Drag and Drop Handlers
            function preventDefaults(e) {
                e.preventDefault();
                e.stopPropagation();
            }

            function highlight() {
                dropArea.classList.add('active');
            }

            function unhighlight() {
                dropArea.classList.remove('active');
            }

            function handleDrop(e) {
                const dt = e.dataTransfer;
                const files = dt.files;

                if (files.length > 0 && files[0].type === 'application/pdf') {
                    handleFiles(files);
                } else {
                    alert('Please drop a PDF file');
                }
            }

            function handleFiles(files) {
                if (files.length > 0) {
                    pdfFile = files[0];
                    fileNameDisplays.textContent = `Selected file: ${pdfFile.name}`;

                    if (currentPdfUrl) {
                        URL.revokeObjectURL(currentPdfUrl);
                    }
                    currentPdfUrl = URL.createObjectURL(pdfFile);
                    loadPdfPreview(currentPdfUrl);
                    downloadBtn.disabled = false;
                }
            }

            // OCR Processing Functions
            async function uploadPdfToMistral(apiKey, pdfFile) {
                const formData = new FormData();
                formData.append('purpose', 'ocr');
                formData.append('file', pdfFile);

                const uploadResponse = await fetch('https://api.mistral.ai/v1/files', {
                    method: 'POST',
                    headers: {
                        'Authorization': `Bearer ${apiKey}`
                    },
                    body: formData
                });

                if (!uploadResponse.ok) {
                    let errorMessage;
                    try {
                        const errorData = await uploadResponse.json();
                        errorMessage = errorData.error?.message || uploadResponse.statusText;
                    } catch (e) {
                        errorMessage = `Status ${uploadResponse.status}: ${uploadResponse.statusText}`;
                    }
                    throw new Error(`File upload failed: ${errorMessage}`);
                }

                return await uploadResponse.json();
            }

            async function processPdfWithOcr(apiKey, pdfFile) {
                try {
                    const fileData = await uploadPdfToMistral(apiKey, pdfFile);
                    const fileId = fileData.id;

                    const urlResponse = await fetch(`https://api.mistral.ai/v1/files/${fileId}/url?expiry=24`, {
                        method: 'GET',
                        headers: {
                            'Accept': 'application/json',
                            'Authorization': `Bearer ${apiKey}`
                        }
                    });

                    if (!urlResponse.ok) {
                        throw new Error(`Failed to get signed URL: ${urlResponse.status}`);
                    }

                    const urlData = await urlResponse.json();
                    const signedUrl = urlData.url;

                    const ocrResponse = await fetch('https://api.mistral.ai/v1/ocr', {
                        method: 'POST',
                        headers: {
                            'Content-Type': 'application/json',
                            'Authorization': `Bearer ${apiKey}`
                        },
                        body: JSON.stringify({
                            model: "mistral-ocr-latest",
                            document: {
                                type: "document_url",
                                document_url: signedUrl
                            },
                            include_image_base64: true
                        })
                    });

                    if (!ocrResponse.ok) {
                        let errorDetail;
                        try {
                            const errorData = await ocrResponse.json();
                            errorDetail = JSON.stringify(errorData);
                        } catch (e) {
                            errorDetail = `Status ${ocrResponse.status}: ${ocrResponse.statusText}`;
                        }
                        throw new Error(`OCR processing failed: ${errorDetail}`);
                    }

                    return await ocrResponse.json();
                } catch (error) {
                    console.error('Error in OCR process:', error);
                    throw error;
                }
            }

            function processMarkdownWithImages(markdownText, result) {
                let mathExpressions = [];

                // First handle math expressions
                markdownText = markdownText.replace(/\$(.+?)\$/g, function (match) {
                    mathExpressions.push(match);
                    return `%%MATH${mathExpressions.length - 1}%%`;
                });

                markdownText = markdownText.replace(/\$\$(.+?)\$\$/g, function (match) {
                    mathExpressions.push(match);
                    return `%%DISPLAY_MATH${mathExpressions.length - 1}%%`;
                });

                // Convert markdown to HTML
                let html = marked.parse(markdownText);

                // Restore math expressions
                html = html.replace(/%%MATH(\d+)%%/g, function (match, index) {
                    return mathExpressions[parseInt(index)];
                });

                html = html.replace(/%%DISPLAY_MATH(\d+)%%/g, function (match, index) {
                    return mathExpressions[parseInt(index)];
                });

                // Handle images
                if (result.pages && Array.isArray(result.pages)) {
                    result.pages.forEach(page => {
                        if (page.images && Array.isArray(page.images)) {
                            page.images.forEach((image, index) => {
                                if (image.id) {
                                    const base64Data = image.image_base64 || image.base64;
                                    if (base64Data) {
                                        const mdRegex = new RegExp(`\\!\\[.*?\\]\\(${image.id}\\)`, 'g');
                                        const imgRegex = new RegExp(`<img[^>]*src=["']${image.id}["'][^>]*>`, 'g');
                                        html = html.replace(mdRegex, `<img src="data:image/${image.format || 'png'};base64,${base64Data}" alt="${image.caption || `Image ${index + 1}`}">`);
                                        html = html.replace(imgRegex, `<img src="data:image/${image.format || 'png'};base64,${base64Data}" alt="${image.caption || `Image ${index + 1}`}">`);
                                    }
                                }
                            });
                        }
                    });
                }

                // Ensure newlines are properly handled in paragraphs
                html = html.replace(/<p>(.*?)<\/p>/gs, function(match, pContent) {
                    pContent = pContent.replace(/\n/g, '<br>');
                    return `<p>${pContent}</p>`;
                });

                return html;
            }

            async function exportToDocx() {
                const apiKey = await fetchApiKey();

                if (!apiKey) {
                    alert('Failed to fetch API key. Please try again.');
                    return;
                }

                if (!pdfFile) {
                    alert('Please upload a PDF file first');
                    return;
                }

                loadingsIndicators.style.display = 'flex';

                try {
                    const ocrResult = await processPdfWithOcr(apiKey, pdfFile);

                    const tempContainer = document.createElement('div');
                    let markdownText = '';

                    if (!ocrResult) {
                        throw new Error('No data received from the API');
                    }

                    if (ocrResult.markdown) {
                        markdownText = ocrResult.markdown;
                    } else if (ocrResult.text) {
                        markdownText = ocrResult.text;
                    } else if (ocrResult.content) {
                        markdownText = ocrResult.content;
                    } else if (ocrResult.pages && Array.isArray(ocrResult.pages)) {
                        markdownText = ocrResult.pages
                            .filter(page => page !== undefined)
                            .map(page => {
                                if (!page) return '';
                                return page.markdown || page.text || page.content || '';
                            })
                            .join('\n\n');
                    }

                    // Remove footnote references like [^0]
                    markdownText = markdownText.replace(/\[\^\d+\]/g, '');

                    // Pre-process newlines to ensure they're preserved
                    markdownText = markdownText.replace(/\n/g, '  \n');

                    if (markdownText) {
                        const processedHtml = processMarkdownWithImages(markdownText, ocrResult);
                        tempContainer.innerHTML = processedHtml;

                        setTimeout(() => {
                            if (window.MathJax) {
                                window.MathJax.typesetPromise([tempContainer]).then(() => {
                                    console.log('Math rendering complete');
                                }).catch(err => {
                                    console.error('Error rendering math:', err);
                                });
                            }
                        }, 100);
                    } else {
                        throw new Error('No text content available in the response');
                    }

                    const maxWidth = 624;

                    const loadImage = src => new Promise(resolve => {
                      const img = new Image();
                      img.onload = () => resolve(img);
                      img.onerror = () => resolve(null);
                      img.src = src;
                    });

                    const images = tempContainer.querySelectorAll('img');
                    for (const img of images) {
                      const loadedImg = await loadImage(img.src);
                      if (!loadedImg) continue;

                      const originalWidth = loadedImg.naturalWidth;
                      const originalHeight = loadedImg.naturalHeight;

                      if (originalWidth > maxWidth) {
                        const ratio = maxWidth / originalWidth;
                        img.width = maxWidth;
                        img.height = originalHeight * ratio;
                      } else {
                        img.width = originalWidth;
                        img.height = originalHeight;
                      }

                      img.setAttribute('style', `width:${img.width}px;height:${img.height}px;display:block;margin:10px auto;`);
                    }

                    tempContainer.querySelectorAll('table').forEach((table) => {
                      table.setAttribute('border', '1');
                      table.style.borderCollapse = 'collapse';
                      table.style.width = '100%';
                      table.style.marginBottom = '10px';

                      table.querySelectorAll('th, td').forEach((cell) => {
                        cell.style.border = '1px solid black';
                        cell.style.padding = '5px';
                        cell.style.textAlign = 'left';
                      });
                    });

                    const cleanedHTML = tempContainer.innerHTML.replace(/\[\^?\d+\]/g, '');

                    const fullHTML = `
                      <!DOCTYPE html>
                      <html>
                      <head>
                        <meta charset="utf-8">
                        <style>
                          body { font-family: Arial, sans-serif; font-size: 12pt; }
                          img { display: block; margin: 10px auto; }
                          table { border-collapse: collapse; width: 100%; margin-bottom: 10px; }
                          th, td { border: 1px solid black; padding: 5px; text-align: left; }
                        </style>
                      </head>
                      <body>${cleanedHTML}</body>
                      </html>
                    `;

                    const blob = htmlDocx.asBlob(fullHTML);
                    const originalFilename = pdfFile.name;
                    let docxFilename;

                    if (/\.pdf$/i.test(originalFilename)) {
                        docxFilename = originalFilename.replace(/\.pdf$/i, '.docx');
                    } else {
                        docxFilename = originalFilename + '.docx';
                    }

                    saveAs(blob, docxFilename);
                } catch (error) {
                    console.error('Error processing PDF:', error);
                    if (error.message.includes('401') || error.message.includes('Unauthorized')) {
                        alert('Authentication failed: Please check your API key and try again.');
                    } else if (error.message.includes('422') || error.message.includes('Unprocessable')) {
                        alert('API Error: The request was rejected. This might be due to incompatible parameters or model availability.');
                    } else {
                        alert(`Error processing PDF: ${error.message}`);
                    }
                } finally {
                    loadingsIndicators.style.display = 'none';
                }
            }

            // Event Listeners Setup
            uploadBtnn.addEventListener('click', () => {
                fileInputs.click();
            });

            fileInputs.addEventListener('change', (e) => {
                handleFiles(e.target.files);
            });

            downloadBtn.addEventListener('click', exportToDocx);

            ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
                dropArea.addEventListener(eventName, preventDefaults, false);
            });

            ['dragenter', 'dragover'].forEach(eventName => {
                dropArea.addEventListener(eventName, highlight, false);
            });

            ['dragleave', 'drop'].forEach(eventName => {
                dropArea.addEventListener(eventName, unhighlight, false);
            });

            dropArea.addEventListener('drop', handleDrop, false);
        })();
    </script>
</body>
</html>
