import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
import axios from 'axios';

const japanese = 'ja-JP';
const english = 'en-US';
const chinese = 'zh-CN';
export type Lang = typeof japanese | typeof english | typeof chinese;

const humanTypeVideoTable = {
  man_01: 'https://storage.googleapis.com/demo-template/dg_male_v2.mp4',
  man_02: 'https://storage.googleapis.com/demo-template/dg_male_v1.mp4',
  woman_01: 'https://storage.googleapis.com/demo-template/dg_female_v1.mp4',
  woman_02: 'https://storage.googleapis.com/demo-template/dg_female_v2.mp4',
} as const;
export type HumanType = keyof typeof humanTypeVideoTable;

const humanTypeVoiceTable = {
  man: {
    [japanese]: 'ja-JP-KeitaNeural',
    [english]: 'en-US-GuyNeural',
    [chinese]: 'zh-CN-YunxiNeural',
  },
  woman: {
    [japanese]: 'ja-JP-NanamiNeural',
    [english]: 'en-US-AriaNeural',
    [chinese]: 'zh-CN-XiaohanNeural',
  },
} as const;

const is_woman = (humanType: string) => {
  const regix = /\w*woman\w*/;
  return regix.test(humanType);
};

// /**
//  * callLipsyncApi
//  * @param {Lang} lang ユーザが選択した言語
//  * @param {string} humanType ユーザが選択したデジタルヒューマンID
//  * @param {string} text ユーザが入力したテキスト
//  * @return {Promise<string>} job_id を返却する Promise
//  */
export interface LipsyncApiResponse {
  status: string;
  job_id: string;
  in_image_file_name?: string;
  in_audio_file_name?: string;
  in_video_file_name?: string;
}

export const callLipsyncApi = async (
  lang: Lang,
  humanType: string,
  text: string
): Promise<string> => {
  const formData = new FormData();
  const audioFile = await getAudioArrayBufferFromAzureSynthesizer(lang, humanType, text).then(
    (buffer) => new Blob([buffer], { type: 'audio/mp3' })
  );
  formData.append('files', audioFile, 'audio.mp3');

  const videoUrl = humanTypeVideoTable[humanType as keyof typeof humanTypeVideoTable];
  const [bucket, file] = videoUrl.split('/').slice(-2);
  const gcsVideoPath = `gs://${bucket}/${file}`;

  const fmin = is_woman(humanType) ? 95 : 55;

  const params = { fmin: fmin.toString(), gcs_video_path: gcsVideoPath };
  const query = new URLSearchParams(params as Record<string, string>);

  const lipsyncServerUrl = process.env.REACT_APP_LIPSYNC_SERVER_URL;
  const lipsyncServerEndpoint =
    lipsyncServerUrl + '/predict_lipsync_async_download_video_from_GCS/';

  const response = await axios.post(lipsyncServerEndpoint, formData, {
    params: query,
  });
  return response.data.job_id;
};

export const playAudioWithAzureSynthesizer = (
  lang: Lang,
  humanType: string,
  text: string,
  setSpeaking: React.Dispatch<React.SetStateAction<boolean>>
) => {
  const synthesizer = initSpeechSynthesizer(lang, humanType, 'speaker');

  synthesizer.speakTextAsync(
    text,
    (result) => {
      if (result) {
        synthesizer.close();
        setTimeout(() => setSpeaking(false), result.audioData.byteLength * 0.15);
      }
    },
    (error) => {
      synthesizer.close();
      throw new Error(error);
    }
  );
};

const getAudioArrayBufferFromAzureSynthesizer = async (
  lang: Lang,
  humanType: string,
  text: string
): Promise<ArrayBuffer> => {
  const synthesizer = initSpeechSynthesizer(lang, humanType, 'stream');

  return new Promise((resolve, reject) => {
    synthesizer.speakTextAsync(
      text,
      (result) => {
        if (result) {
          synthesizer.close();
          return resolve(result.audioData);
        }
      },
      (error) => {
        synthesizer.close();
        return reject(error);
      }
    );
  });
};

type AudioDestinationOption = 'speaker' | 'stream';
const initSpeechSynthesizer = (
  lang: Lang,
  humanType: string,
  audioDestination: AudioDestinationOption
) => {
  const gender = is_woman(humanType) ? 'woman' : 'man';
  const voiceName = humanTypeVoiceTable[gender][lang];

  const subscriptionKey = process.env.REACT_APP_AZURE_SPEECH_SUBSCRIPTION_KEY;
  if (!subscriptionKey) {
    throw new Error('Please set your Azure Speech subscription key in .env');
  }

  const region = process.env.REACT_APP_AZURE_SPEECH_REGION || 'eastus';
  const speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, region);
  speechConfig.speechSynthesisLanguage = lang;
  speechConfig.speechSynthesisVoiceName = voiceName;

  switch (audioDestination) {
    case 'speaker': {
      const audioConfig = sdk.AudioConfig.fromDefaultSpeakerOutput();
      return new sdk.SpeechSynthesizer(speechConfig, audioConfig);
    }
    case 'stream': {
      const stream = sdk.AudioOutputStream.createPullStream();
      const audioConfig = sdk.AudioConfig.fromStreamOutput(stream);
      return new sdk.SpeechSynthesizer(speechConfig, audioConfig);
    }
  }
};
