import { getDocument, GlobalWorkerOptions } from 'pdfjs-dist';
import React, { useState, useEffect } from 'react';

// Set the worker source for pdfjs-dist
GlobalWorkerOptions.workerSrc = 'https://unpkg.com/pdfjs-dist@3.11.174/build/pdf.worker.min.js';

// Function to extract text with formatting from a PDF file
const extractTextWithFormatting = async (pdfUrl: string) => {
  try {
    const loadingTask = getDocument(pdfUrl);
    const pdf = await loadingTask.promise;
    let formattedText = '';

    for (let i = 1; i <= pdf.numPages; i++) {
      const page = await pdf.getPage(i);
      const textContent = await page.getTextContent();

      let lastY = 0; // Track the Y position of the last text item
      textContent.items.forEach((item: any) => {
        const { transform, str } = item;
        const y = transform[5]; // Y position of the text item

        // Add a newline if the Y position changes significantly (indicating a new line)
        if (Math.abs(y - lastY) > 10) {
          formattedText += '\n';
        }
        formattedText += str + ' ';

        lastY = y; // Update the last Y position
      });

      // Add a paragraph break after each page
      formattedText += '\n\n';
    }

    return formattedText.trim(); // Remove trailing whitespace
  } catch (error) {
    console.error('Error extracting text from PDF:', error);
    return '';
  }
};

// Component to display transcript text with formatting
const TranscriptText: React.FC<{ pdfUrl: string }> = ({ pdfUrl }) => {
  const [transcriptText, setTranscriptText] = useState<string>('');

  useEffect(() => {
    const fetchTranscript = async () => {
      const text = await extractTextWithFormatting(pdfUrl);
      setTranscriptText(text);
    };

    fetchTranscript();
  }, [pdfUrl]);

  return (
    <div style={{ 
      color: 'white', 
      textAlign: 'left', 
      fontSize: '18px', 
      overflowY: 'auto', 
      height: '100%', 
      whiteSpace: 'pre-wrap' // Preserve line breaks and spaces
    }}>
      {transcriptText}
    </div>
  );
};

export default TranscriptText;