import os.path
import time
import warnings

import gradio as gr
import whisper
from pygtrans import Translate
from whisper.tokenizer import LANGUAGES, TO_LANGUAGE_CODE

warnings.simplefilter(action="ignore", category=UserWarning)
warnings.simplefilter(action="ignore", category=FutureWarning)

# https://github.com/openai/whisper
MODELS = {
    # "tiny": whisper.load_model("tiny"),
    # "tiny.en": whisper.load_model("tiny.en"),
    # "base": whisper.load_model("base"),
    # "base.en": whisper.load_model("base.en"),
    # "small": whisper.load_model("small"),
    # "small.en": whisper.load_model("small.en"),
    # "medium": whisper.load_model("medium"),
    # "medium.en": whisper.load_model("medium.en"),
    # "large": whisper.load_model("large"),
    "turbo": whisper.load_model("turbo"),
}
MODEL_LANG_CODES = {"Auto Detect": "None"}
MODEL_LANG_CODES.update(TO_LANGUAGE_CODE)
MODEL_CODE_LANGS = {v: k for k, v in MODEL_LANG_CODES.items()}

# https://github.com/foyoux/pygtrans
AT = Translate(target="zh-CN", fmt="text")
SRTS_DIR = "assets/srts"
SRT_LANGUAGES = {
    "Chinese (Simplified)": "zh-CN",
    "Abkhaz": "ab",
    "Acehnese": "ace",
    "Acholi": "ach",
    "Afar": "aa",
    "Afrikaans": "af",
    "Albanian": "sq",
    "Alur": "alz",
    "Amharic": "am",
    "Arabic": "ar",
    "Armenian": "hy",
    "Assamese": "as",
    "Avar": "av",
    "Awadhi": "awa",
    "Aymara": "ay",
    "Azerbaijani": "az",
    "Balinese": "ban",
    "Baluchi": "bal",
    "Bambara": "bm",
    "Baoulé": "bci",
    "Bashkir": "ba",
    "Basque": "eu",
    "Batak Karo": "btx",
    "Batak Simalungun": "bts",
    "Batak Toba": "bbc",
    "Belarusian": "be",
    "Bemba": "bem",
    "Bengali": "bn",
    "Betawi": "bew",
    "Bhojpuri": "bho",
    "Bikol": "bik",
    "Bosnian": "bs",
    "Breton": "br",
    "Bulgarian": "bg",
    "Buryat": "bua",
    "Cantonese": "yue",
    "Catalan": "ca",
    "Cebuano": "ceb",
    "Chamorro": "ch",
    "Chechen": "ce",
    "Chichewa": "ny",
    # "Chinese (Simplified)": "zh-CN",
    "Chinese (Traditional)": "zh-TW",
    "Chuukese": "chk",
    "Chuvash": "cv",
    "Corsican": "co",
    "Crimean Tatar": "crh",
    "Croatian": "hr",
    "Czech": "cs",
    "Danish": "da",
    "Dari": "fa-AF",
    "Dhivehi": "dv",
    "Dinka": "din",
    "Dogri": "doi",
    "Dombe": "dov",
    "Dutch": "nl",
    "Dyula": "dyu",
    "Dzongkha": "dz",
    "English": "en",
    "Esperanto": "eo",
    "Estonian": "et",
    "Ewe": "ee",
    "Faroese": "fo",
    "Fijian": "fj",
    "Filipino": "tl",
    "Finnish": "fi",
    "Fon": "fon",
    "French": "fr",
    "Frisian": "fy",
    "Friulian": "fur",
    "Fulani": "ff",
    "Ga": "gaa",
    "Galician": "gl",
    "Georgian": "ka",
    "German": "de",
    "Greek": "el",
    "Guarani": "gn",
    "Gujarati": "gu",
    "Haitian Creole": "ht",
    "Hakha Chin": "cnh",
    "Hausa": "ha",
    "Hawaiian": "haw",
    "Hebrew": "iw",
    "Hiligaynon": "hil",
    "Hindi": "hi",
    "Hmong": "hmn",
    "Hungarian": "hu",
    "Hunsrik": "hrx",
    "Iban": "iba",
    "Icelandic": "is",
    "Igbo": "ig",
    "Ilocano": "ilo",
    "Indonesian": "id",
    "Irish": "ga",
    "Italian": "it",
    "Jamaican Patois": "jam",
    "Japanese": "ja",
    "Javanese": "jw",
    "Jingpo": "kac",
    "Kalaallisut": "kl",
    "Kannada": "kn",
    "Kanuri": "kr",
    "Kapampangan": "pam",
    "Kazakh": "kk",
    "Khasi": "kha",
    "Khmer": "km",
    "Kiga": "cgg",
    "Kikongo": "kg",
    "Kinyarwanda": "rw",
    "Kituba": "ktu",
    "Kokborok": "trp",
    "Komi": "kv",
    "Konkani": "gom",
    "Korean": "ko",
    "Krio": "kri",
    "Kurdish (Kurmanji)": "ku",
    "Kurdish (Sorani)": "ckb",
    "Kyrgyz": "ky",
    "Lao": "lo",
    "Latgalian": "ltg",
    "Latin": "la",
    "Latvian": "lv",
    "Ligurian": "lij",
    "Limburgish": "li",
    "Lingala": "ln",
    "Lithuanian": "lt",
    "Lombard": "lmo",
    "Luganda": "lg",
    "Luo": "luo",
    "Luxembourgish": "lb",
    "Macedonian": "mk",
    "Madurese": "mad",
    "Maithili": "mai",
    "Makassar": "mak",
    "Malagasy": "mg",
    "Malay": "ms",
    "Malay (Jawi)": "ms-Arab",
    "Malayalam": "ml",
    "Maltese": "mt",
    "Mam": "mam",
    "Manx": "gv",
    "Maori": "mi",
    "Marathi": "mr",
    "Marshallese": "mh",
    "Marwadi": "mwr",
    "Mauritian Creole": "mfe",
    "Meadow Mari": "chm",
    "Meiteilon (Manipuri)": "mni-Mtei",
    "Minang": "min",
    "Mizo": "lus",
    "Mongolian": "mn",
    "Myanmar (Burmese)": "my",
    "Nahuatl (Eastern Huasteca)": "nhe",
    "Ndau": "ndc-ZW",
    "Ndebele (South)": "nr",
    "Nepalbhasa (Newari)": "new",
    "Nepali": "ne",
    "NKo": "bm-Nkoo",
    "Norwegian": "no",
    "Nuer": "nus",
    "Occitan": "oc",
    "Odia (Oriya)": "or",
    "Oromo": "om",
    "Ossetian": "os",
    "Pangasinan": "pag",
    "Papiamento": "pap",
    "Pashto": "ps",
    "Persian": "fa",
    "Polish": "pl",
    "Portuguese (Brazil)": "pt",
    "Portuguese (Portugal)": "pt-PT",
    "Punjabi (Gurmukhi)": "pa",
    "Punjabi (Shahmukhi)": "pa-Arab",
    "Quechua": "qu",
    "Qʼeqchiʼ": "kek",
    "Romani": "rom",
    "Romanian": "ro",
    "Rundi": "rn",
    "Russian": "ru",
    "Sami (North)": "se",
    "Samoan": "sm",
    "Sango": "sg",
    "Sanskrit": "sa",
    "Santali": "sat-Latn",
    "Scots Gaelic": "gd",
    "Sepedi": "nso",
    "Serbian": "sr",
    "Sesotho": "st",
    "Seychellois Creole": "crs",
    "Shan": "shn",
    "Shona": "sn",
    "Sicilian": "scn",
    "Silesian": "szl",
    "Sindhi": "sd",
    "Sinhala": "si",
    "Slovak": "sk",
    "Slovenian": "sl",
    "Somali": "so",
    "Spanish": "es",
    "Sundanese": "su",
    "Susu": "sus",
    "Swahili": "sw",
    "Swati": "ss",
    "Swedish": "sv",
    "Tahitian": "ty",
    "Tajik": "tg",
    "Tamazight": "ber-Latn",
    "Tamazight (Tifinagh)": "ber",
    "Tamil": "ta",
    "Tatar": "tt",
    "Telugu": "te",
    "Tetum": "tet",
    "Thai": "th",
    "Tibetan": "bo",
    "Tigrinya": "ti",
    "Tiv": "tiv",
    "Tok Pisin": "tpi",
    "Tongan": "to",
    "Tsonga": "ts",
    "Tswana": "tn",
    "Tulu": "tcy",
    "Tumbuka": "tum",
    "Turkish": "tr",
    "Turkmen": "tk",
    "Tuvan": "tyv",
    "Twi": "ak",
    "Udmurt": "udm",
    "Ukrainian": "uk",
    "Urdu": "ur",
    "Uyghur": "ug",
    "Uzbek": "uz",
    "Venda": "ve",
    "Venetian": "vec",
    "Vietnamese": "vi",
    "Waray": "war",
    "Welsh": "cy",
    "Wolof": "wo",
    "Xhosa": "xh",
    "Yakut": "sah",
    "Yiddish": "yi",
    "Yoruba": "yo",
    "Yucatec Maya": "yua",
    "Zapotec": "zap",
    "Zulu": "zu",
}


def translate(text: str, srt_lang, srt_file, audio):
    if not srt_file:
        gr.Warning("Please transcribe the audio first")
        return gr.skip()
    start_time = time.time()
    srts = []
    for i in text.strip().split('\n\n'):
        i = i.strip().split('\n')
        a = i[0]
        b = i[1]
        c = '\n'.join(i[2:])
        srts.append((a, b, c))
    trans = AT.translate([i[2] for i in srts], target=srt_lang)
    srt_trans = []
    for i, j in zip(srts, trans):
        srt_trans.append(i[0])
        srt_trans.append(i[1])
        srt_trans.append(j.translatedText)
        srt_trans.append("")
    translation = '\n'.join(srt_trans)
    file_name = os.path.splitext(os.path.basename(audio))[0]
    end_time = time.time()
    return generate_file(file_name, translation, srt_lang), gr.Text(
        translation, info=f"Time: {end_time - start_time:.2f} seconds"
    )


def generate_srt(results):
    srts = []
    for i in results["segments"]:
        srts.append(f'{i["id"] + 1}')
        srts.append(
            f"{seconds_to_srt_time(i['start'])} --> {seconds_to_srt_time(i['end'])}"
        )
        srts.append(i["text"])
        srts.append("")
    return "\n".join(srts)


def seconds_to_srt_time(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = int(seconds % 60)
    milliseconds = int((seconds - int(seconds)) * 1000)
    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"


def generate_file(file_name, srt, code):
    os.makedirs(SRTS_DIR, exist_ok=True)
    file_path = os.path.join(SRTS_DIR, f"{file_name}-{code}.srt")
    with open(file_path, "w") as f:
        f.write(srt)
    return file_path


def transcribe(model, model_lang, audio):
    if not audio:
        gr.Warning("Please upload an audio file")
        return gr.skip()
    if model_lang == "None":
        model_lang = None
    start_time = time.time()
    file_name = os.path.splitext(os.path.basename(audio))[0]
    results = MODELS[model].transcribe(audio, language=model_lang)
    code = results["language"]
    language = LANGUAGES.get(code, MODEL_CODE_LANGS.get(code, "Unknown"))
    srt = generate_srt(results)
    end_time = time.time()
    return generate_file(file_name, srt, code), gr.Text(
        srt,
        info=f"Language: {language}, Time: {end_time - start_time:.2f} seconds",
    )


def main():
    with gr.Blocks(analytics_enabled=False) as demo:
        with gr.Row():
            with gr.Column():
                choices = list(MODELS.keys())
                model = gr.Radio(choices=choices, value=choices[0], label="Model")
                model_lang = gr.Dropdown(
                    [(k, v) for k, v in MODEL_LANG_CODES.items()],
                    value="None",
                    label="Audio Language",
                )
                audio = gr.File(label="Audio/Video")
                transcribe_btn = gr.Button("Transcribe")
            with gr.Column():
                srt_file1 = gr.File(label="Transcription SRT File", interactive=False)
                transcribe_text = gr.Text(
                    label="Transcription",
                    lines=10,
                    max_lines=20,
                    autoscroll=False,
                    interactive=False,
                )
                srt_lang = gr.Dropdown(
                    [(k, v) for k, v in SRT_LANGUAGES.items()],
                    value="zh-CN",
                    label="SRT Language",
                )
                translate_btn = gr.Button("Translate")
            with gr.Column():
                srt_file2 = gr.File(label="Translation SRT File")
                translate_text = gr.Text(
                    label="Translation", lines=10, max_lines=20, autoscroll=False
                )
        audio.clear(
            lambda: (
                gr.Text(None, info=None),
                gr.Text(None, info=None),
                None,
                None,
            ),
            outputs=[transcribe_text, translate_text, srt_file1, srt_file2],
        )
        transcribe_btn.click(
            transcribe,
            inputs=[model, model_lang, audio],
            outputs=[srt_file1, transcribe_text],
        )
        translate_btn.click(
            translate,
            inputs=[transcribe_text, srt_lang, srt_file1, audio],
            outputs=[srt_file2, translate_text],
        )
    demo.launch(allowed_paths=["assets/srts"])


if __name__ == "__main__":
    main()