import axios from 'axios';
import * as sdk from 'microsoft-cognitiveservices-speech-sdk';

const AZURE_SPEECH_KEY = process.env.REACT_APP_AZURE_SPEECH_KEY;
const AZURE_SPEECH_REGION = process.env.REACT_APP_AZURE_SPEECH_REGION;
const OPENAI_API_KEY = process.env.REACT_APP_OPENAI_API_KEY;

// Initialize speech config
const speechConfig = sdk.SpeechConfig.fromSubscription(AZURE_SPEECH_KEY, AZURE_SPEECH_REGION);
speechConfig.speechRecognitionLanguage = 'en-US';

// Function to convert speech to text
export const speechToText = () => {
  return new Promise((resolve, reject) => {
    const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
    const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);

    recognizer.recognizeOnceAsync(
      (result) => {
        if (result.reason === sdk.ResultReason.RecognizedSpeech) {
          resolve(result.text);
        } else {
          reject('Speech not recognized');
        }
        recognizer.close();
      },
      (err) => {
        reject(err);
        recognizer.close();
      }
    );
  });
};

// Function to convert text to speech
export const textToSpeech = async (text) => {
  const synthesizer = new sdk.SpeechSynthesizer(speechConfig);
  return new Promise((resolve, reject) => {
    synthesizer.speakTextAsync(
      text,
      (result) => {
        if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
          resolve(result.audioData);
        } else {
          reject('Speech synthesis canceled');
        }
        synthesizer.close();
      },
      (error) => {
        reject(error);
        synthesizer.close();
      }
    );
  });
};

// Function to call OpenAI API
export const callOpenAI = async (messages, functions, function_call = 'auto') => {
  try {
    const response = await axios.post(
      'https://api.openai.com/v1/chat/completions',
      {
        model: 'gpt-4o',
        messages,
        functions,
        function_call,
        max_tokens: 300,
      },
      {
        headers: {
          'Authorization': `Bearer ${OPENAI_API_KEY}`,
          'Content-Type': 'application/json',
        },
      }
    );
    return response.data.choices[0].message;
  } catch (error) {
    console.error('Error calling OpenAI API:', error.response ? error.response.data : error.message);
    throw error;
  }
};

// Function to capture image
// Function to capture image
export const captureImage = (videoElement) => {
    return new Promise((resolve, reject) => {
      if (!videoElement) {
        reject('No video element provided');
        return;
      }
  
      const canvas = document.createElement('canvas');
      canvas.width = videoElement.videoWidth;
      canvas.height = videoElement.videoHeight;
      canvas.getContext('2d').drawImage(videoElement, 0, 0);
      
      // Convert to blob
      canvas.toBlob((blob) => {
        const reader = new FileReader();
        reader.onloadend = () => {
          // Get base64 data
          const base64data = reader.result;
          resolve(base64data);
        };
        reader.onerror = reject;
        reader.readAsDataURL(blob);
      }, 'image/jpeg', 0.8);  // JPEG format with 0.8 quality
    });
  };
// Function to setup camera
export const setupCamera = async (videoElement, facingMode = 'user') => {
  if (!videoElement) return;

  if (videoElement.srcObject) {
    videoElement.srcObject.getTracks().forEach(track => track.stop());
  }

  try {
    const stream = await navigator.mediaDevices.getUserMedia({
      video: { facingMode: facingMode }
    });
    videoElement.srcObject = stream;
    await videoElement.play();
  } catch (error) {
    console.error('Error accessing the camera:', error);
  }
};

// Updated main function to handle voice interaction
// Updated main function to handle voice interaction
export const handleVoiceInteraction = async (userProfile, memories, previousMessages, audioEnabled, videoElement, audioContext) => {
  try {
    const userInput = await speechToText(audioContext);
    
    let messages = [
      { 
        role: 'system', 
        content: `You are an AI assistant named Avacasa. You are AI fashion and Skincare expert. Provide short answer as you are voice mode, only opt for longer answer if required. Do not add markdown or even add : this signs or any other in your responses. You can see images if the user asks you to look at something, you have function calling to capture images so when user ask anything visual capture image, don't ask user to upload image as this is live visual mode. 
          User Profile: ${JSON.stringify(userProfile)}
          User Memories: ${JSON.stringify(memories)}`
      },
      ...previousMessages,
      { role: 'user', content: userInput }
    ];

    let defaultResponse = "";

    if (userInput.toLowerCase().includes('look at') || userInput.toLowerCase().includes('what do you see')) {
      defaultResponse = getRandomDefaultResponse();
      if (audioEnabled) {
        await textToSpeech(defaultResponse, audioContext);
      }
      const imageData = await captureImage(videoElement);
      messages.push({
        role: 'user',
        content: [
          { type: "text", text: "Here's what I'm looking at:" },
          { type: "image_url", image_url: { "url": imageData } }
        ]
      });
    }

    const functions = [
      {
        name: 'capture_image',
        description: 'Capture an image using the device camera',
        parameters: { type: 'object', properties: {} }
      }
    ];

    const aiResponse = await callOpenAI(messages, functions);
    console.log("AI response:", aiResponse);

    let finalResponse;

    if (aiResponse.function_call && aiResponse.function_call.name === 'capture_image') {
      console.log("AI requested image capture");
      defaultResponse = getRandomDefaultResponse();
      if (audioEnabled) {
        console.log("Converting default response to speech...");
        await textToSpeech(defaultResponse, audioContext);
      }
      const imageData = await captureImage(videoElement);
      messages.push({
        role: 'function',
        name: 'capture_image',
        content: JSON.stringify({ image_url: imageData })
      });
      const secondResponse = await callOpenAI(messages, functions);
      console.log("AI second response:", secondResponse);
      finalResponse = secondResponse.content;
    } else {
      finalResponse = aiResponse.content;
    }

    if (audioEnabled) {
      console.log("Converting response to speech...");
      await textToSpeech(finalResponse, audioContext);
    }

    return { text: finalResponse, defaultResponse };
  } catch (error) {
    console.error('Error in voice interaction:', error);
    throw error;
  }
};

function getRandomDefaultResponse() {
  const responses = [
    "I'm looking at what you're showing me right now.",
    "I can see your camera feed. What would you like me to focus on?",
    "I'm analyzing the live image from your camera.",
    "I'm examining what's in front of your camera at the moment.",
    "I'm observing the current scene through your device's camera.",
    "I'm taking in the visual information from your camera feed.",
    "I'm processing the live video input from your device.",
    "I'm focusing on what your camera is pointing at right now.",
    "I'm interpreting the real-time visual data from your camera.",
    "I'm assessing the current view from your device's camera."
  ];
  return responses[Math.floor(Math.random() * responses.length)];
}