diff --git a/AI/LSTM_attention_test.ipynb b/AI/LSTM_attention_test.ipynb new file mode 100644 index 0000000..47e6cd6 --- /dev/null +++ b/AI/LSTM_attention_test.ipynb @@ -0,0 +1,1681 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ix1Jbg_xWTxg", + "outputId": "2b5f0a6a-2634-47a4-e269-b72e64d74705" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import os\n", + "import json\n", + "import csv\n", + "\n", + "TL_sentence_path = '/content/drive/MyDrive/LSTM+attention/sentence_dataTL.csv'\n", + "VL_sentence_path = '/content/drive/MyDrive/LSTM+attention/sentence_dataVL.csv'\n", + "\n", + "# data파일 불러오기\n", + "TL_sentence_data = pd.read_csv(TL_sentence_path, encoding='utf-8')\n", + "VL_sentence_data = pd.read_csv(VL_sentence_path, encoding='utf-8')\n", + "\n", + "# 중복 제거, Pronuncication 열은 필요 없다고 생각\n", + "TL_sentence_data.drop('Pronunciation', axis=1, inplace=True)\n", + "TL_sentence_data = TL_sentence_data.drop_duplicates().reset_index(drop=True)\n", + "VL_sentence_data.drop('Pronunciation', axis=1, inplace=True)\n", + "VL_sentence_data = VL_sentence_data.drop_duplicates().reset_index(drop=True)" + ], + "metadata": { + "id": "xPCQBU1BWfcw" + }, + "execution_count": 40, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "TL_sentence_data[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "UZGjs0aPXrCe", + "outputId": "b61a98c7-d756-401d-dec3-e4cbea4a68c4" + }, + "execution_count": 41, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Dialect \\\n", + "0 여기는 옥수갱이 잘 된다 카던디 옥수갱이 말고는 무신 농사를 많이 짓습니껴 \n", + "1 장례 관련해서 초상집 같은 데 가가 하지 말아야 데는 기 있습니껴 \n", + "2 예전에는 집 안에서 여자들이 남자 위로 띠넘으면 안 덴다 캤습니껴 \n", + "3 음식을 많이 장만하려고 하면 일손이 모자라서 음식하기가 안 힘들었습니까 \n", + "4 이 구두 하나만 계속 신고 댕기이꺼네 인자 굽이 많이 닳아서 갈아야 되겠네 \n", + "\n", + " Standard \n", + "0 여기는 옥수수 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까 \n", + "1 장례 관련해서 초상집 같은 데 가서 하지 말아야 데는 게 있습니까 \n", + "2 예전에는 집 안에서 여자들이 남자 위로 뛰어넘으면 안 된다 했습니까 \n", + "3 음식을 많이 장만하려고 하면 일손이 모자라서 음식하기가 안 힘들었습니까 \n", + "4 이 구두 하나만 계속 신고 다니니까 이제 굽이 많이 닳아서 갈아야 되겠네 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DialectStandard
0여기는 옥수갱이 잘 된다 카던디 옥수갱이 말고는 무신 농사를 많이 짓습니껴여기는 옥수수 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까
1장례 관련해서 초상집 같은 데 가가 하지 말아야 데는 기 있습니껴장례 관련해서 초상집 같은 데 가서 하지 말아야 데는 게 있습니까
2예전에는 집 안에서 여자들이 남자 위로 띠넘으면 안 덴다 캤습니껴예전에는 집 안에서 여자들이 남자 위로 뛰어넘으면 안 된다 했습니까
3음식을 많이 장만하려고 하면 일손이 모자라서 음식하기가 안 힘들었습니까음식을 많이 장만하려고 하면 일손이 모자라서 음식하기가 안 힘들었습니까
4이 구두 하나만 계속 신고 댕기이꺼네 인자 굽이 많이 닳아서 갈아야 되겠네이 구두 하나만 계속 신고 다니니까 이제 굽이 많이 닳아서 갈아야 되겠네
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"TL_sentence_data[:5]\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Dialect\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"\\uc7a5\\ub840 \\uad00\\ub828\\ud574\\uc11c \\ucd08\\uc0c1\\uc9d1 \\uac19\\uc740 \\ub370 \\uac00\\uac00 \\ud558\\uc9c0 \\ub9d0\\uc544\\uc57c \\ub370\\ub294 \\uae30 \\uc788\\uc2b5\\ub2c8\\uaef4\",\n \"\\uc774 \\uad6c\\ub450 \\ud558\\ub098\\ub9cc \\uacc4\\uc18d \\uc2e0\\uace0 \\ub315\\uae30\\uc774\\uaebc\\ub124 \\uc778\\uc790 \\uad7d\\uc774 \\ub9ce\\uc774 \\ub2f3\\uc544\\uc11c \\uac08\\uc544\\uc57c \\ub418\\uaca0\\ub124\",\n \"\\uc608\\uc804\\uc5d0\\ub294 \\uc9d1 \\uc548\\uc5d0\\uc11c \\uc5ec\\uc790\\ub4e4\\uc774 \\ub0a8\\uc790 \\uc704\\ub85c \\ub760\\ub118\\uc73c\\uba74 \\uc548 \\ub374\\ub2e4 \\ucea4\\uc2b5\\ub2c8\\uaef4\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Standard\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"\\uc7a5\\ub840 \\uad00\\ub828\\ud574\\uc11c \\ucd08\\uc0c1\\uc9d1 \\uac19\\uc740 \\ub370 \\uac00\\uc11c \\ud558\\uc9c0 \\ub9d0\\uc544\\uc57c \\ub370\\ub294 \\uac8c \\uc788\\uc2b5\\ub2c8\\uae4c\",\n \"\\uc774 \\uad6c\\ub450 \\ud558\\ub098\\ub9cc \\uacc4\\uc18d \\uc2e0\\uace0 \\ub2e4\\ub2c8\\ub2c8\\uae4c \\uc774\\uc81c \\uad7d\\uc774 \\ub9ce\\uc774 \\ub2f3\\uc544\\uc11c \\uac08\\uc544\\uc57c \\ub418\\uaca0\\ub124\",\n \"\\uc608\\uc804\\uc5d0\\ub294 \\uc9d1 \\uc548\\uc5d0\\uc11c \\uc5ec\\uc790\\ub4e4\\uc774 \\ub0a8\\uc790 \\uc704\\ub85c \\ub6f0\\uc5b4\\ub118\\uc73c\\uba74 \\uc548 \\ub41c\\ub2e4 \\ud588\\uc2b5\\ub2c8\\uae4c\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 41 + } + ] + }, + { + "cell_type": "code", + "source": [ + "VL_sentence_data[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "a0cWFdpxDKN7", + "outputId": "d47ecea6-b4fc-4d39-fd3c-d1d7369da953" + }, + "execution_count": 42, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Dialect \\\n", + "0 오랫동안 한 동네에서 살았던 할머니이제 도주식하면 매매 아프네요 \n", + "1 혈압약은 시간을 맞춰 챙겨 드셔야지 안 그러면 효과가 없습니다 \n", + "2 집에 돌아와 보이꺼네 문이 열려 있고 뼈다지가 열어둔 돈 전부 없어지던 어이떼 \n", + "3 아들 오늘 중요한 시험 보니까에 이 생엿 하고 사가꼬 먹고 힘내서 시험 잘 봐레이 \n", + "4 옛날부터 조상꿈이나 돼지꿈 꾸만 집에 돈 많이 들어온다고 좋아 해지로 \n", + "\n", + " Standard \n", + "0 오랫동안 한 동네에서 살았던 할머니인데 돌아가겨서 마음이 아프네요 \n", + "1 혈압약은 시간을 맞춰 챙겨 드셔야지 안 그러면 효과가 없습니다 \n", + "2 집에 돌아와 보니까 문이 열려 있고 서랍이 열어둔 돈 전부 없어지던 어이떼 \n", + "3 아들 오늘 중요한 시험 보니까 이 생 엿 하고 사서 먹고 힘내서 시험 잘 봐 \n", + "4 옛날부터 조상꿈이나 돼지꿈 꾸면 집에 돈 많이 들어온다고 좋아 했죠 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DialectStandard
0오랫동안 한 동네에서 살았던 할머니이제 도주식하면 매매 아프네요오랫동안 한 동네에서 살았던 할머니인데 돌아가겨서 마음이 아프네요
1혈압약은 시간을 맞춰 챙겨 드셔야지 안 그러면 효과가 없습니다혈압약은 시간을 맞춰 챙겨 드셔야지 안 그러면 효과가 없습니다
2집에 돌아와 보이꺼네 문이 열려 있고 뼈다지가 열어둔 돈 전부 없어지던 어이떼집에 돌아와 보니까 문이 열려 있고 서랍이 열어둔 돈 전부 없어지던 어이떼
3아들 오늘 중요한 시험 보니까에 이 생엿 하고 사가꼬 먹고 힘내서 시험 잘 봐레이아들 오늘 중요한 시험 보니까 이 생 엿 하고 사서 먹고 힘내서 시험 잘 봐
4옛날부터 조상꿈이나 돼지꿈 꾸만 집에 돈 많이 들어온다고 좋아 해지로옛날부터 조상꿈이나 돼지꿈 꾸면 집에 돈 많이 들어온다고 좋아 했죠
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"VL_sentence_data[:5]\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Dialect\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"\\ud608\\uc555\\uc57d\\uc740 \\uc2dc\\uac04\\uc744 \\ub9de\\ucdb0 \\ucc59\\uaca8 \\ub4dc\\uc154\\uc57c\\uc9c0 \\uc548 \\uadf8\\ub7ec\\uba74 \\ud6a8\\uacfc\\uac00 \\uc5c6\\uc2b5\\ub2c8\\ub2e4\",\n \"\\uc61b\\ub0a0\\ubd80\\ud130 \\uc870\\uc0c1\\uafc8\\uc774\\ub098 \\ub3fc\\uc9c0\\uafc8 \\uafb8\\ub9cc \\uc9d1\\uc5d0 \\ub3c8 \\ub9ce\\uc774 \\ub4e4\\uc5b4\\uc628\\ub2e4\\uace0 \\uc88b\\uc544 \\ud574\\uc9c0\\ub85c\",\n \"\\uc9d1\\uc5d0 \\ub3cc\\uc544\\uc640 \\ubcf4\\uc774\\uaebc\\ub124 \\ubb38\\uc774 \\uc5f4\\ub824 \\uc788\\uace0 \\ubf08\\ub2e4\\uc9c0\\uac00 \\uc5f4\\uc5b4\\ub454 \\ub3c8 \\uc804\\ubd80 \\uc5c6\\uc5b4\\uc9c0\\ub358 \\uc5b4\\uc774\\ub5bc\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Standard\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"\\ud608\\uc555\\uc57d\\uc740 \\uc2dc\\uac04\\uc744 \\ub9de\\ucdb0 \\ucc59\\uaca8 \\ub4dc\\uc154\\uc57c\\uc9c0 \\uc548 \\uadf8\\ub7ec\\uba74 \\ud6a8\\uacfc\\uac00 \\uc5c6\\uc2b5\\ub2c8\\ub2e4\",\n \"\\uc61b\\ub0a0\\ubd80\\ud130 \\uc870\\uc0c1\\uafc8\\uc774\\ub098 \\ub3fc\\uc9c0\\uafc8 \\uafb8\\uba74 \\uc9d1\\uc5d0 \\ub3c8 \\ub9ce\\uc774 \\ub4e4\\uc5b4\\uc628\\ub2e4\\uace0 \\uc88b\\uc544 \\ud588\\uc8e0\",\n \"\\uc9d1\\uc5d0 \\ub3cc\\uc544\\uc640 \\ubcf4\\ub2c8\\uae4c \\ubb38\\uc774 \\uc5f4\\ub824 \\uc788\\uace0 \\uc11c\\ub78d\\uc774 \\uc5f4\\uc5b4\\ub454 \\ub3c8 \\uc804\\ubd80 \\uc5c6\\uc5b4\\uc9c0\\ub358 \\uc5b4\\uc774\\ub5bc\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "standard_sentences_TL = TL_sentence_data['Standard']\n", + "dialect_sentences_TL = TL_sentence_data['Dialect']\n", + "standard_sentences_VL = VL_sentence_data['Standard']\n", + "dialect_sentences_VL = VL_sentence_data['Dialect']" + ], + "metadata": { + "id": "jlxCy4d3WyDB" + }, + "execution_count": 43, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "standard_sentences_TL[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aj-awCcGFReV", + "outputId": "4cf3e645-11c4-4450-9615-0926f2306167" + }, + "execution_count": 44, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 여기는 옥수수 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까\n", + "1 장례 관련해서 초상집 같은 데 가서 하지 말아야 데는 게 있습니까\n", + "2 예전에는 집 안에서 여자들이 남자 위로 뛰어넘으면 안 된다 했습니까\n", + "3 음식을 많이 장만하려고 하면 일손이 모자라서 음식하기가 안 힘들었습니까\n", + "4 이 구두 하나만 계속 신고 다니니까 이제 굽이 많이 닳아서 갈아야 되겠네\n", + "Name: Standard, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ] + }, + { + "cell_type": "code", + "source": [ + "dialect_sentences_TL[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ctrb-c6hFWAX", + "outputId": "6340fc72-eb4e-4ab8-9757-cabea9479748" + }, + "execution_count": 45, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 여기는 옥수갱이 잘 된다 카던디 옥수갱이 말고는 무신 농사를 많이 짓습니껴\n", + "1 장례 관련해서 초상집 같은 데 가가 하지 말아야 데는 기 있습니껴\n", + "2 예전에는 집 안에서 여자들이 남자 위로 띠넘으면 안 덴다 캤습니껴\n", + "3 음식을 많이 장만하려고 하면 일손이 모자라서 음식하기가 안 힘들었습니까\n", + "4 이 구두 하나만 계속 신고 댕기이꺼네 인자 굽이 많이 닳아서 갈아야 되겠네\n", + "Name: Dialect, dtype: object" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 학습 데이터 중에서 겹치는 표준어 문장과 방언 문장 제거\n", + "filtered_data_TR = {\n", + " \"src\": [],\n", + " \"tar\": []\n", + "}\n", + "\n", + "for i in range(0, len(dialect_sentences_TL)):\n", + " if (standard_sentences_TL[i] != dialect_sentences_TL[i]):\n", + " filtered_data_TR[\"src\"].append(dialect_sentences_TL[i])\n", + " filtered_data_TR[\"tar\"].append(standard_sentences_TL[i])\n", + "\n", + "filtered_df_TR = pd.DataFrame(filtered_data_TR)\n", + "\n", + "print(filtered_df_TR[:10])\n", + "print(len(filtered_df_TR))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GEIz3cMTXc76", + "outputId": "094b4095-e589-401a-ea7b-1d882bdd5963" + }, + "execution_count": 46, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " src \\\n", + "0 여기는 옥수갱이 잘 된다 카던디 옥수갱이 말고는 무신 농사를 많이 짓습니껴 \n", + "1 장례 관련해서 초상집 같은 데 가가 하지 말아야 데는 기 있습니껴 \n", + "2 예전에는 집 안에서 여자들이 남자 위로 띠넘으면 안 덴다 캤습니껴 \n", + "3 이 구두 하나만 계속 신고 댕기이꺼네 인자 굽이 많이 닳아서 갈아야 되겠네 \n", + "4 콩이파리는 가시가 있어가 꺼끄럽고 뻣뻣하고 묵어 보면 맛이 없어예 \n", + "5 여기에는 옥수갱이가 잘 된다 카던디 옥수갱이 말고는 무신 농사를 마이 짓습니껴 \n", + "6 여개는 옥수갱이가 잘 된다 카던디 옥수갱이 말고는 무신 농사를 마이 짓습니껴 \n", + "7 음식 먹으만 계속 설사하고 토하고 할 때는 물 많이 잡수고 병원에 가봐야 합니데이 \n", + "8 논두렁에 전선이 늘어져 있거나 정전이 됐을 때 두꺼비 집을 무짜로 만지만 위험합니더 \n", + "9 딱꾹지를 멈치지도 않고 점들 하는디 이럴 때는 우예 해야 합니껴 \n", + "\n", + " tar \n", + "0 여기는 옥수수 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까 \n", + "1 장례 관련해서 초상집 같은 데 가서 하지 말아야 데는 게 있습니까 \n", + "2 예전에는 집 안에서 여자들이 남자 위로 뛰어넘으면 안 된다 했습니까 \n", + "3 이 구두 하나만 계속 신고 다니니까 이제 굽이 많이 닳아서 갈아야 되겠네 \n", + "4 콩잎은 가시가 있어서 껄끄럽고 뻣뻣하고 먹어 보면 맛이 없어요 \n", + "5 여기에는 옥수수가 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까 \n", + "6 여기는 옥수수가 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까 \n", + "7 음식 먹으면 계속 설사하고 토하고 할 때는 물 많이 잡수고 병원에 가봐야 합니다 \n", + "8 논두렁에 전선이 늘어져 있거나 정전이 됐을 때 두꺼비 집을 함부로 만지면 위험합니다 \n", + "9 딱꾹지를 멈추지도 않고 점들 하는데 이럴 때는 어떻게 해야 합니까 \n", + "211878\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# 검증 데이터 중에서 겹치는 표준어 문장과 방언 문장 제거\n", + "filtered_data_VL = {\n", + " \"src\": [],\n", + " \"tar\": []\n", + "}\n", + "\n", + "for i in range(0, len(dialect_sentences_VL)):\n", + " if (standard_sentences_VL[i] != dialect_sentences_VL[i]):\n", + " filtered_data_VL[\"src\"].append(dialect_sentences_VL[i])\n", + " filtered_data_VL[\"tar\"].append(standard_sentences_VL[i])\n", + "\n", + "filtered_df_VL = pd.DataFrame(filtered_data_VL)\n", + "\n", + "print(filtered_df_VL[:10])\n", + "print(len(filtered_df_VL))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "kv006ObsFwYF", + "outputId": "98d2caf3-6d41-46d2-d33c-871d77841043" + }, + "execution_count": 47, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " src \\\n", + "0 오랫동안 한 동네에서 살았던 할머니이제 도주식하면 매매 아프네요 \n", + "1 집에 돌아와 보이꺼네 문이 열려 있고 뼈다지가 열어둔 돈 전부 없어지던 어이떼 \n", + "2 아들 오늘 중요한 시험 보니까에 이 생엿 하고 사가꼬 먹고 힘내서 시험 잘 봐레이 \n", + "3 옛날부터 조상꿈이나 돼지꿈 꾸만 집에 돈 많이 들어온다고 좋아 해지로 \n", + "4 게얼에 먹을 채소나 과일 같은 것은 어데 보관을 했습니꺼 \n", + "5 촌구숙이라 젊은 사람들은 함부레 없고 전부 노인들만 있으이꺼네 농사 짓기가 힘들어요 \n", + "6 촌구석이라 젊은 사람들은 한 번이 없고 전부 노인들만 있으니까네 농사 짓기가 힘들어요 \n", + "7 소도 사람맨치로 잘 먹어야 근육도 붙고 심도 생겨서 일을 잘 하지로 \n", + "8 소도 사람 맨치로 잘 먹어야 근육도 붇고 힘도 생겨서 일을 잘 하지요 \n", + "9 옷가심을 짜를 때는 미리 선을 끟어 놓아야 쪽바리 잘 자를 수 있어예 \n", + "\n", + " tar \n", + "0 오랫동안 한 동네에서 살았던 할머니인데 돌아가겨서 마음이 아프네요 \n", + "1 집에 돌아와 보니까 문이 열려 있고 서랍이 열어둔 돈 전부 없어지던 어이떼 \n", + "2 아들 오늘 중요한 시험 보니까 이 생 엿 하고 사서 먹고 힘내서 시험 잘 봐 \n", + "3 옛날부터 조상꿈이나 돼지꿈 꾸면 집에 돈 많이 들어온다고 좋아 했죠 \n", + "4 겨울에 먹을 채소나 과일 같은 것은 어디에 보관을 했습니까 \n", + "5 촌구석이라 젊은 사람들은 아예 없고 전부 노인들만 있으니까 농사 짓기가 힘들어요 \n", + "6 촌구석이라 젊은 사람들은 한 번이 없고 전부 노인들만 있으니까 농사 짓기가 힘들어요 \n", + "7 소도 사람처럼 잘 먹어야 근육도 붙고 힘도 생겨서 일을 잘 하지요 \n", + "8 소도 사람 처럼 잘 먹어야 근육도 붇고 힘도 생겨서 일을 잘 하지요 \n", + "9 옷감을 자를 때는 미리 선을 그어 놓아야 똑바로 잘 자를 수 있어요 \n", + "27509\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# 문장 길이 계산\n", + "def sentenceLengths(sentences):\n", + " return [len(sentence.split(' ')) for sentence in sentences]" + ], + "metadata": { + "id": "OFCJmuqdOo6m" + }, + "execution_count": 48, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "plt.hist(sentenceLengths(filtered_data_TR['src']), bins=10)\n", + "plt.xlabel('length of dialect')\n", + "plt.ylabel('number of dialect')\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 449 + }, + "id": "-zqR5FSPpN3X", + "outputId": "8c626b82-8e48-4504-f14a-98261e591f3f" + }, + "execution_count": 49, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "plt.hist(sentenceLengths(filtered_data_TR['tar']), bins=10)\n", + "plt.xlabel('length of standard')\n", + "plt.ylabel('number of standard')\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 449 + }, + "id": "wET-0eUhp2Vv", + "outputId": "af7c7c24-db11-43ad-a5e7-2a8f2e376979" + }, + "execution_count": 50, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "def threshold_len_max(max_len, data):\n", + " data = list(data) # 제네레이터를 리스트로 변환\n", + " sentence_count = 0\n", + " for sentence in data:\n", + " if len(sentence) <= max_len:\n", + " sentence_count += 1\n", + " return sentence_count / len(data) * 100\n", + "\n", + "def threshold_len_min(min_len, data):\n", + " data = list(data) # 제네레이터를 리스트로 변환\n", + " sentence_count = 0\n", + " for sentence in data:\n", + " if len(sentence) >= min_len:\n", + " sentence_count += 1\n", + " return sentence_count / len(data) * 100" + ], + "metadata": { + "id": "SqMQxZO4p1TQ" + }, + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(filtered_data_TR['src'])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cRQ_fdfSE0Rb", + "outputId": "7abe010f-44bf-4014-d36f-b9a812542219" + }, + "execution_count": 52, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "211878" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ] + }, + { + "cell_type": "code", + "source": [ + "max_len = 22\n", + "dialect_max = threshold_len_max(max_len, (sentence.split(' ') for sentence in filtered_data_TR['src']))\n", + "standard_max = threshold_len_max(max_len, (sentence.split(' ') for sentence in filtered_data_TR['tar']))\n", + "\n", + "print(f\"dialect 중 {max_len} 이하인 비율은 {dialect_max}\")\n", + "print(f\"standard 중 {max_len} 이하인 비율은 {standard_max}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ali5lQXaqSf0", + "outputId": "398406ce-6798-40b8-9942-ab1546088d0f" + }, + "execution_count": 53, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "dialect 중 22 이하인 비율은 80.23060440442141\n", + "standard 중 22 이하인 비율은 80.11355591425254\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "## 문장의 길이가 긴 것이 많아 80프로 정도의 데이터만 남김\n", + "\n", + "d_filter_indices = [i for i, sentence in enumerate(sentence.split(' ') for sentence in filtered_data_TR['src']) if len(sentence) <= max_len ]\n", + "s_filter_indices = [i for i, sentence in enumerate(sentence.split(' ') for sentence in filtered_data_TR['tar']) if len(sentence) <= max_len ]" + ], + "metadata": { + "id": "iLXOEUz2u45D" + }, + "execution_count": 54, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "indices = list(set(d_filter_indices) & set(s_filter_indices))" + ], + "metadata": { + "id": "aV630gtgwMDM" + }, + "execution_count": 55, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(indices)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "U2I4XBAtPd_b", + "outputId": "69928af8-0155-4376-f9ad-f74c02b7bb57" + }, + "execution_count": 56, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "169723" + ] + }, + "metadata": {}, + "execution_count": 56 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import pickle\n", + "1\n", + "# pickle 파일로부터 데이터를 불러옴\n", + "with open('/content/drive/MyDrive/LSTM+attention/filtered_dialect.pkl', 'rb') as f:\n", + " filtered_dialect = pickle.load(f)\n", + "\n", + "with open('/content/drive/MyDrive/LSTM+attention/filtered_standard.pkl', 'rb') as f:\n", + " filtered_standard = pickle.load(f)\n", + "\n", + "# 불러온 데이터를 확인\n", + "print(filtered_dialect[:10])\n", + "print(filtered_standard[:10])" + ], + "metadata": { + "id": "B041nyJnISFJ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "a1a6f21a-e62b-451d-ba83-7bc6131169a4" + }, + "execution_count": 57, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "['여기는 옥수갱이 잘 된다 카던디 옥수갱이 말고는 무신 농사를 많이 짓습니껴', '장례 관련해서 초상집 같은 데 가가 하지 말아야 데는 기 있습니껴', '예전에는 집 안에서 여자들이 남자 위로 띠넘으면 안 덴다 캤습니껴', '이 구두 하나만 계속 신고 댕기이꺼네 인자 굽이 많이 닳아서 갈아야 되겠네', '콩이파리는 가시가 있어가 꺼끄럽고 뻣뻣하고 묵어 보면 맛이 없어예', '여기에는 옥수갱이가 잘 된다 카던디 옥수갱이 말고는 무신 농사를 마이 짓습니껴', '여개는 옥수갱이가 잘 된다 카던디 옥수갱이 말고는 무신 농사를 마이 짓습니껴', '음식 먹으만 계속 설사하고 토하고 할 때는 물 많이 잡수고 병원에 가봐야 합니데이', '논두렁에 전선이 늘어져 있거나 정전이 됐을 때 두꺼비 집을 무짜로 만지만 위험합니더', '딱꾹지를 멈치지도 않고 점들 하는디 이럴 때는 우예 해야 합니껴']\n", + "['여기는 옥수수 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까', '장례 관련해서 초상집 같은 데 가서 하지 말아야 데는 게 있습니까', '예전에는 집 안에서 여자들이 남자 위로 뛰어넘으면 안 된다 했습니까', '이 구두 하나만 계속 신고 다니니까 이제 굽이 많이 닳아서 갈아야 되겠네', '콩잎은 가시가 있어서 껄끄럽고 뻣뻣하고 먹어 보면 맛이 없어요', '여기에는 옥수수가 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까', '여기는 옥수수가 잘 된다 하던데 옥수수 말고는 무슨 농사를 많이 짓습니까', '음식 먹으면 계속 설사하고 토하고 할 때는 물 많이 잡수고 병원에 가봐야 합니다', '논두렁에 전선이 늘어져 있거나 정전이 됐을 때 두꺼비 집을 함부로 만지면 위험합니다', '딱꾹지를 멈추지도 않고 점들 하는데 이럴 때는 어떻게 해야 합니까']\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(len(filtered_dialect))\n", + "print(len(filtered_standard))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_dUEMZ8HRPow", + "outputId": "5067f019-15c7-478e-97ad-074ad2d14085" + }, + "execution_count": 58, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "169723\n", + "169723\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "plt.hist(sentenceLengths(filtered_dialect), bins=10)\n", + "plt.xlabel('length of dialect')\n", + "plt.ylabel('number of dialect')\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 449 + }, + "id": "yf8viS-nR3bN", + "outputId": "6b532d01-82f7-49b4-9926-a530de3da23d" + }, + "execution_count": 59, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "plt.hist(sentenceLengths(filtered_standard), bins=10)\n", + "plt.xlabel('length of standard')\n", + "plt.ylabel('number of standard')\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 449 + }, + "id": "2g430fC7RliO", + "outputId": "c0891822-c902-49b8-aeb0-4821ad1e608b" + }, + "execution_count": 60, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAk0AAAGwCAYAAAC0HlECAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA2C0lEQVR4nO3df1iUVf7/8deADuCPAX8BoqK4WmIqJiiim9bKikU/TNyPmqtk2q4taoqZ+l3TdNsw3S11cXXNNvvs5mZupiWJKYpeJamL2qIrlC6GrQL+BCUFhfv7R8t8nLC8x6AZ4Pm4rrku7nMOZ94z08TL+z5zxmIYhiEAAAB8Jw9XFwAAAFAbEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACQ1cXUBdUVFRoVOnTqlp06ayWCyuLgcAAJhgGIYuXbqkoKAgeXh897kkQlM1OXXqlNq1a+fqMgAAwG04efKk2rZt+51jCE3VpGnTppK+ftJtNpuLqwEAAGYUFxerXbt29r/j34XQVE0qL8nZbDZCEwAAtYyZpTUsBAcAADCB0AQAAGACoQkAAMAEQhMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACYQmAAAAEwhNAAAAJhCaAAAATGjg6gIA1D8dZqW4ugSnnVgY6+oSALgYZ5oAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACYQmAAAAEwhNAAAAJhCaAAAATCA0AQAAmEBoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNAEAAJhAaAIAADCB0AQAAGACoQkAAMAEQhMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACW4TmhYuXCiLxaKpU6fa265evaqEhAS1aNFCTZo0UVxcnAoKChx+Ly8vT7GxsWrUqJH8/f01Y8YMXb9+3WFMenq6evXqJS8vL3Xq1Elr1qypcv/Lly9Xhw4d5O3trcjISO3bt68mHiYAAKil3CI07d+/X3/605/Uo0cPh/Zp06bp/fff1/r167Vr1y6dOnVKw4YNs/eXl5crNjZWZWVl2rNnj9544w2tWbNGc+fOtY/Jzc1VbGys7rvvPh06dEhTp07VhAkTtHXrVvuYdevWKTExUfPmzdOBAwcUFhammJgYFRYW1vyDBwAAtYLFMAzDlQVcvnxZvXr10h//+Ee98MIL6tmzp5YsWaKioiK1atVKa9eu1fDhwyVJ2dnZCg0NVUZGhvr27astW7bowQcf1KlTpxQQECBJWrlypWbOnKkzZ87IarVq5syZSklJ0eHDh+33OXLkSF28eFGpqamSpMjISPXu3VvJycmSpIqKCrVr106TJ0/WrFmzTD2O4uJi+fr6qqioSDabrTqfIqDO6TArxdUlOO3EwlhXlwCgBjjz99vlZ5oSEhIUGxur6Ohoh/bMzExdu3bNob1Lly4KDg5WRkaGJCkjI0Pdu3e3ByZJiomJUXFxsY4cOWIf8825Y2Ji7HOUlZUpMzPTYYyHh4eio6PtY26mtLRUxcXFDjcAAFB3NXDlnb/11ls6cOCA9u/fX6UvPz9fVqtVfn5+Du0BAQHKz8+3j7kxMFX2V/Z915ji4mJduXJFFy5cUHl5+U3HZGdnf2vtSUlJmj9/vrkHCgAAaj2XnWk6efKknn76ab355pvy9vZ2VRm3bfbs2SoqKrLfTp486eqSAABADXJZaMrMzFRhYaF69eqlBg0aqEGDBtq1a5eWLVumBg0aKCAgQGVlZbp48aLD7xUUFCgwMFCSFBgYWOXTdJXHtxpjs9nk4+Ojli1bytPT86ZjKue4GS8vL9lsNocbAACou1wWmgYNGqSsrCwdOnTIfouIiNDo0aPtPzds2FBpaWn238nJyVFeXp6ioqIkSVFRUcrKynL4lNu2bdtks9nUtWtX+5gb56gcUzmH1WpVeHi4w5iKigqlpaXZxwAAALhsTVPTpk3VrVs3h7bGjRurRYsW9vbx48crMTFRzZs3l81m0+TJkxUVFaW+fftKkgYPHqyuXbtqzJgxWrRokfLz8zVnzhwlJCTIy8tLkjRx4kQlJyfr2Wef1RNPPKEdO3bo7bffVkrK/316JzExUfHx8YqIiFCfPn20ZMkSlZSUaNy4cT/QswEAANydSxeC38orr7wiDw8PxcXFqbS0VDExMfrjH/9o7/f09NTmzZv11FNPKSoqSo0bN1Z8fLwWLFhgHxMSEqKUlBRNmzZNS5cuVdu2bbV69WrFxMTYx4wYMUJnzpzR3LlzlZ+fr549eyo1NbXK4nAAAFB/uXyfprqCfZoA89inCYC7qFX7NAEAANQGhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACYQmAAAAEwhNAAAAJhCaAAAATCA0AQAAmEBoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNAEAAJhAaAIAADCB0AQAAGACoQkAAMAEQhMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgQgNXFwAAtUGHWSmuLsFpJxbGuroEoE7hTBMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACYQmAAAAEwhNAAAAJhCaAAAATCA0AQAAmEBoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNAEAAJhAaAIAADCB0AQAAGACoQkAAMAEQhMAAIAJhCYAAAATCE0AAAAmNDAzqFmzZrJYLKYmPH/+/PcqCAAAwB2ZCk1Lliyx/3zu3Dm98MILiomJUVRUlCQpIyNDW7du1XPPPVcjRQIAALiaxTAMw5lfiIuL03333adJkyY5tCcnJ2v79u3auHFjddZXaxQXF8vX11dFRUWy2WyuLgdwax1mpbi6hHrhxMJYV5cAuD1n/n47vaZp69atGjJkSJX2IUOGaPv27c5OBwAAUCs4HZpatGihTZs2VWnftGmTWrRoUS1FAQAAuBtTa5puNH/+fE2YMEHp6emKjIyUJO3du1epqal69dVXq71AAAAAd+B0aHr88ccVGhqqZcuWacOGDZKk0NBQffTRR/YQBQAAUNc4FZquXbumX/7yl3ruuef05ptv1lRNAAAAbsepNU0NGzbUO++8U1O1AAAAuC2nF4IPHTq03m4rAAAA6i+nQ1Pnzp21YMECDR8+XElJSVq2bJnDzRkrVqxQjx49ZLPZZLPZFBUVpS1bttj7r169qoSEBLVo0UJNmjRRXFycCgoKHObIy8tTbGysGjVqJH9/f82YMUPXr193GJOenq5evXrJy8tLnTp10po1a6rUsnz5cnXo0EHe3t6KjIzUvn37nHosAACgbnN6Ifhrr70mPz8/ZWZmKjMz06HPYrFoypQppudq27atFi5cqM6dO8swDL3xxht65JFHdPDgQd11112aNm2aUlJStH79evn6+mrSpEkaNmyYPv74Y0lSeXm5YmNjFRgYqD179uj06dMaO3asGjZsqBdffFGSlJubq9jYWE2cOFFvvvmm0tLSNGHCBLVu3VoxMTGSpHXr1ikxMVErV65UZGSklixZopiYGOXk5Mjf39/ZpwgAANRBTu8IXtOaN2+uxYsXa/jw4WrVqpXWrl2r4cOHS5Kys7MVGhqqjIwM9e3bV1u2bNGDDz6oU6dOKSAgQJK0cuVKzZw5U2fOnJHVatXMmTOVkpKiw4cP2+9j5MiRunjxolJTUyVJkZGR6t27t5KTkyVJFRUVateunSZPnqxZs2bdtM7S0lKVlpbaj4uLi9WuXTt2BAdMYEfwHwY7ggO3VqM7gteU8vJyvfXWWyopKVFUVJQyMzN17do1RUdH28d06dJFwcHBysjIkPT1d951797dHpgkKSYmRsXFxTpy5Ih9zI1zVI6pnKOsrEyZmZkOYzw8PBQdHW0fczNJSUny9fW139q1a/f9nwQAAOC2nL48J0lffvml3nvvPeXl5amsrMyh7+WXX3ZqrqysLEVFRenq1atq0qSJ3n33XXXt2lWHDh2S1WqVn5+fw/iAgADl5+dLkvLz8x0CU2V/Zd93jSkuLtaVK1d04cIFlZeX33RMdnb2t9Y9e/ZsJSYm2o8rzzQBAIC6yenQlJaWpocfflgdO3ZUdna2unXrphMnTsgwDPXq1cvpAu68804dOnRIRUVF+vvf/674+Hjt2rXL6Xl+aF5eXvLy8nJ1GQAA4Afi9OW52bNn65lnnlFWVpa8vb31zjvv6OTJkxo4cKB+9rOfOV2A1WpVp06dFB4erqSkJIWFhWnp0qUKDAxUWVmZLl686DC+oKBAgYGBkqTAwMAqn6arPL7VGJvNJh8fH7Vs2VKenp43HVM5BwAAgNOh6ejRoxo7dqwkqUGDBrpy5YqaNGmiBQsW6KWXXvreBVVUVKi0tFTh4eFq2LCh0tLS7H05OTnKy8tTVFSUJCkqKkpZWVkqLCy0j9m2bZtsNpu6du1qH3PjHJVjKuewWq0KDw93GFNRUaG0tDT7GAAAAKcvzzVu3Ni+jql169Y6fvy47rrrLknS2bNnnZpr9uzZuv/++xUcHKxLly5p7dq1Sk9P19atW+Xr66vx48crMTFRzZs3l81m0+TJkxUVFaW+fftKkgYPHqyuXbtqzJgxWrRokfLz8zVnzhwlJCTYL51NnDhRycnJevbZZ/XEE09ox44devvtt5WS8n+f3klMTFR8fLwiIiLUp08fLVmyRCUlJRo3bpyzTw8AAKijnA5Nffv21UcffaTQ0FA98MADmj59urKysrRhwwZ7mDGrsLBQY8eO1enTp+Xr66sePXpo69at+ulPfypJeuWVV+Th4aG4uDiVlpYqJiZGf/zjH+2/7+npqc2bN+upp55SVFSUGjdurPj4eC1YsMA+JiQkRCkpKZo2bZqWLl2qtm3bavXq1fY9miRpxIgROnPmjObOnav8/Hz17NlTqampVRaHAwCA+svpfZr+/e9/6/Lly+rRo4dKSko0ffp07dmzR507d9bLL7+s9u3b11Stbs2ZfR6A+o59mn4Y7NME3Jozf7+dPtPUsWNH+8+NGzfWypUrna8QAACglnGbzS0BAADcmakzTc2aNZPFYjE14fnz579XQQAAAO7IVGhasmSJ/edz587phRdeUExMjP0j+RkZGdq6dauee+65GikSAADA1ZxeCB4XF6f77rtPkyZNcmhPTk7W9u3btXHjxuqsr9ZgIThgHgvBfxgsBAdurUa/sHfr1q0aMmRIlfYhQ4Zo+/btzk4HAABQKzgdmlq0aKFNmzZVad+0aZNatGhRLUUBAAC4G6e3HJg/f74mTJig9PR0RUZGSpL27t2r1NRUvfrqq9VeIAAAgDtwOjQ9/vjjCg0N1bJly7RhwwZJUmhoqD766CN7iAIAAKhrnA5NkhQZGak333yzumsBAABwW7cVmioqKnTs2DEVFhaqoqLCoW/AgAHVUhgAAIA7cTo0ffLJJ3rsscf0xRdf6Ju7FVgsFpWXl1dbcQAAAO7C6dA0ceJERUREKCUlRa1btza9UzgAAEBt5nRo+vzzz/X3v/9dnTp1qol6AAAA3JLT+zRFRkbq2LFjNVELAACA23L6TNPkyZM1ffp05efnq3v37mrYsKFDf48ePaqtOAAAAHfhdGiKi4uTJD3xxBP2NovFIsMwWAgOAADqLKdDU25ubk3UAQAA4NacDk3t27eviToAAADc2m1tbilJ//rXv5SXl6eysjKH9ocffvh7FwUAAOBunA5N//73v/Xoo48qKyvLvpZJkn2/JtY0AQCAusjpLQeefvpphYSEqLCwUI0aNdKRI0e0e/duRUREKD09vQZKBAAAcD2nzzRlZGRox44datmypTw8POTh4aEf//jHSkpK0pQpU3Tw4MGaqBMAAMClnD7TVF5erqZNm0qSWrZsqVOnTkn6eoF4Tk5O9VYHAADgJpw+09StWzd9+umnCgkJUWRkpBYtWiSr1apVq1apY8eONVEjAACAyzkdmubMmaOSkhJJ0oIFC/Tggw/qnnvuUYsWLfTWW29Ve4EAAADuwOnQFBMTY/+5U6dOys7O1vnz59WsWTP7J+gAAADqGqfXND3xxBO6dOmSQ1vz5s311VdfOXy1CgAAQF1iMSo3WjLJ09NTp0+flr+/v0P72bNnFRgYqOvXr1drgbVFcXGxfH19VVRUJJvN5upyUI90mJXi6hLgpk4sjHV1CYDbc+bvt+nLc8XFxTIMQ4Zh6NKlS/L29rb3lZeX64MPPqgSpAAAAOoK06HJz89PFotFFotFd9xxR5V+i8Wi+fPnV2txAAAA7sJ0aNq5c6cMw9BPfvITvfPOO2revLm9z2q1qn379goKCqqRIgEAAFzNdGgaOHCgJCk3N1fBwcF8Ug4AANQrTn967ujRo/r444/tx8uXL1fPnj312GOP6cKFC9VaHAAAgLtwOjTNmDFDxcXFkqSsrCwlJibqgQceUG5urhITE6u9QAAAAHfg9OaWubm56tq1qyTpnXfe0UMPPaQXX3xRBw4c0AMPPFDtBQIAALgDp880Wa1WffXVV5Kk7du3a/DgwZK+3uCy8gwUAABAXeP0maYf//jHSkxMVP/+/bVv3z6tW7dOkvTZZ5+pbdu21V4gAACAO3D6TFNycrIaNGigv//971qxYoXatGkjSdqyZYuGDBlS7QUCAAC4A6fPNAUHB2vz5s1V2l955ZVqKQgAAMAdOX2mCQAAoD4iNAEAAJhAaAIAADDBVGj65z//qYqKipquBQAAwG2ZCk133323zp49K0nq2LGjzp07V6NFAQAAuBtTocnPz0+5ubmSpBMnTnDWCQAA1DumthyIi4vTwIED1bp1a1ksFkVERMjT0/OmY//9739Xa4EAAADuwFRoWrVqlYYNG6Zjx45pypQpevLJJ9W0adOarg0AAMBtmN7csnK378zMTD399NOEJgAAUK84vSP466+/bv/5yy+/lCS+cw4AANR5Tu/TVFFRoQULFsjX11ft27dX+/bt5efnp9/85jcsEAcAAHWW02eafv3rX+u1117TwoUL1b9/f0nSRx99pOeff15Xr17Vb3/722ovEgAAwNWcDk1vvPGGVq9erYcfftje1qNHD7Vp00a/+tWvCE0AAKBOcvry3Pnz59WlS5cq7V26dNH58+erpSgAAAB343RoCgsLU3JycpX25ORkhYWFVUtRAAAA7sbpy3OLFi1SbGystm/frqioKElSRkaGTp48qQ8++KDaCwQAAHAHTp9pGjhwoD777DM9+uijunjxoi5evKhhw4YpJydH99xzT03UCAAA4HJOn2mSpKCgIBZ8AwCAesXpM00AAAD1EaEJAADABEITAACACU6FJsMwlJeXp6tXr9ZUPQAAAG7J6dDUqVMnnTx5sqbqAQAAcEtOhSYPDw917txZ586dq6l6AAAA3JLTa5oWLlyoGTNm6PDhwzVRDwAAgFtyep+msWPH6quvvlJYWJisVqt8fHwc+vn+OQAAUBc5HZqWLFlSA2UAAAC4N6dDU3x8fE3UAQAA4NZua5+m48ePa86cORo1apQKCwslSVu2bNGRI0ecmicpKUm9e/dW06ZN5e/vr6FDhyonJ8dhzNWrV5WQkKAWLVqoSZMmiouLU0FBgcOYvLw8xcbGqlGjRvL399eMGTN0/fp1hzHp6enq1auXvLy81KlTJ61Zs6ZKPcuXL1eHDh3k7e2tyMhI7du3z6nHAwAA6i6nQ9OuXbvUvXt37d27Vxs2bNDly5clSZ9++qnmzZvn9FwJCQn65JNPtG3bNl27dk2DBw9WSUmJfcy0adP0/vvva/369dq1a5dOnTqlYcOG2fvLy8sVGxursrIy7dmzR2+88YbWrFmjuXPn2sfk5uYqNjZW9913nw4dOqSpU6dqwoQJ2rp1q33MunXrlJiYqHnz5unAgQMKCwtTTEyMPRQCAID6zWIYhuHML0RFRelnP/uZEhMT1bRpU3366afq2LGj9u3bp2HDhunLL7+87WLOnDkjf39/7dq1SwMGDFBRUZFatWqltWvXavjw4ZKk7OxshYaGKiMjQ3379tWWLVv04IMP6tSpUwoICJAkrVy5UjNnztSZM2dktVo1c+ZMpaSkOHzib+TIkbp48aJSU1MlSZGRkerdu7eSk5MlSRUVFWrXrp0mT56sWbNmVam1tLRUpaWl9uPi4mK1a9dORUVFstlst/0cAM7qMCvF1SXATZ1YGOvqEgC3V1xcLF9fX1N/v50+05SVlaVHH320Sru/v7/Onj3r7HQOioqKJEnNmzeXJGVmZuratWuKjo62j+nSpYuCg4OVkZEhScrIyFD37t3tgUmSYmJiVFxcbL9cmJGR4TBH5ZjKOcrKypSZmekwxsPDQ9HR0fYx35SUlCRfX1/7rV27dt/rsQMAAPfmdGjy8/PT6dOnq7QfPHhQbdq0ue1CKioqNHXqVPXv31/dunWTJOXn58tqtcrPz89hbEBAgPLz8+1jbgxMlf2Vfd81pri4WFeuXNHZs2dVXl5+0zGVc3zT7NmzVVRUZL+xSzoAAHWb05+eGzlypGbOnKn169fLYrGooqJCH3/8sZ555hmNHTv2tgtJSEjQ4cOH9dFHH932HD8kLy8veXl5uboMAADwA3H6TNOLL76oLl26qF27drp8+bK6du2qAQMGqF+/fpozZ85tFTFp0iRt3rxZO3fuVNu2be3tgYGBKisr08WLFx3GFxQUKDAw0D7mm5+mqzy+1RibzSYfHx+1bNlSnp6eNx1TOQcAAKjfnA5NVqtVr776qo4fP67Nmzfrr3/9q7Kzs/WXv/xFnp6eTs1lGIYmTZqkd999Vzt27FBISIhDf3h4uBo2bKi0tDR7W05OjvLy8hQVFSXp64XpWVlZDp9y27Ztm2w2m7p27Wofc+MclWMq57BarQoPD3cYU1FRobS0NPsYAABQvzl9ea5ScHCwffGzxWK5rTkSEhK0du1abdq0SU2bNrWvH/L19ZWPj498fX01fvx4JSYmqnnz5rLZbJo8ebKioqLUt29fSdLgwYPVtWtXjRkzRosWLVJ+fr7mzJmjhIQE++WziRMnKjk5Wc8++6yeeOIJ7dixQ2+//bZSUv7vU0eJiYmKj49XRESE+vTpoyVLlqikpETjxo273acIAADUIbe1ueVrr72mbt26ydvbW97e3urWrZtWr17t9DwrVqxQUVGR7r33XrVu3dp+W7dunX3MK6+8ogcffFBxcXEaMGCAAgMDtWHDBnu/p6enNm/eLE9PT0VFRennP/+5xo4dqwULFtjHhISEKCUlRdu2bVNYWJh+//vfa/Xq1YqJibGPGTFihH73u99p7ty56tmzpw4dOqTU1NQqi8MBAED95PQ+TXPnztXLL79sP+Mjff2R/uTkZE2bNs0hrNQnzuzzAFQn9mnCt2GfJuDWnPn77fTluRUrVujVV1/VqFGj7G0PP/ywevToocmTJ9fb0AQAAOo2py/PXbt2TREREVXaw8PDq3zfGwAAQF3hdGgaM2aMVqxYUaV91apVGj16dLUUBQAA4G5MXZ5LTEy0/2yxWLR69Wp9+OGH9k+w7d27V3l5ed9rc0sAAAB3Zio0HTx40OE4PDxcknT8+HFJUsuWLdWyZUv7d70BAADUNaZC086dO2u6DgAAALd2W/s0AQAA1DdObzlw9epV/eEPf9DOnTtVWFioiooKh/4DBw5UW3EAAADuwunQNH78eH344YcaPny4+vTpc9tfoQIAAFCbOB2aNm/erA8++ED9+/eviXoAAADcktNrmtq0aaOmTZvWRC0AAABuy+nQ9Pvf/14zZ87UF198URP1AAAAuCWnL89FRETo6tWr6tixoxo1aqSGDRs69J8/f77aigMAAHAXToemUaNG6T//+Y9efPFFBQQEsBAcAADUC06Hpj179igjI0NhYWE1UQ8AAIBbcnpNU5cuXXTlypWaqAUAAMBtOR2aFi5cqOnTpys9PV3nzp1TcXGxww0AAKAucvry3JAhQyRJgwYNcmg3DEMWi0Xl5eXVUxkAAIAbcTo08eW9AACgPnI6NA0cOLAm6gAAAHBrToem3bt3f2f/gAEDbrsYAAAAd+V0aLr33nurtN24VxNrmgAAQF3kdGi6cOGCw/G1a9d08OBBPffcc/rtb39bbYUBAL6fDrNSXF2C004sjHV1CcC3cjo0+fr6Vmn76U9/KqvVqsTERGVmZlZLYQAAAO7E6X2avk1AQIBycnKqazoAAAC34vSZpn/+858Ox4Zh6PTp01q4cKF69uxZXXUBAAC4FadDU8+ePWWxWGQYhkN737599ec//7naCgMAAHAnToem3Nxch2MPDw+1atVK3t7e1VYUAACAu3E6NLVv374m6gAAAHBrTocmSUpLS1NaWpoKCwtVUVHh0MclOgAAUBc5HZrmz5+vBQsWKCIiQq1bt3bY2BIAAKCucjo0rVy5UmvWrNGYMWNqoh4AAAC35PQ+TWVlZerXr19N1AIAAOC2nA5NEyZM0Nq1a2uiFgAAALfl9OW5q1evatWqVdq+fbt69Oihhg0bOvS//PLL1VYcAACAu7itHcErd/4+fPiwQx+LwgEAQF3ldGjauXNnTdQBAADg1qrtC3sBAADqMkITAACACYQmAAAAEwhNAAAAJhCaAAAATCA0AQAAmEBoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNAEAAJhAaAIAADCB0AQAAGACoQkAAMAEQhMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACYQmAAAAEwhNAAAAJhCaAAAATCA0AQAAmNDA1QUAAFCpw6wUV5fgtBMLY11dAn4gLj3TtHv3bj300EMKCgqSxWLRxo0bHfoNw9DcuXPVunVr+fj4KDo6Wp9//rnDmPPnz2v06NGy2Wzy8/PT+PHjdfnyZYcx//znP3XPPffI29tb7dq106JFi6rUsn79enXp0kXe3t7q3r27Pvjgg2p/vAAAoPZyaWgqKSlRWFiYli9fftP+RYsWadmyZVq5cqX27t2rxo0bKyYmRlevXrWPGT16tI4cOaJt27Zp8+bN2r17t37xi1/Y+4uLizV48GC1b99emZmZWrx4sZ5//nmtWrXKPmbPnj0aNWqUxo8fr4MHD2ro0KEaOnSoDh8+XHMPHgAA1CoWwzAMVxchSRaLRe+++66GDh0q6euzTEFBQZo+fbqeeeYZSVJRUZECAgK0Zs0ajRw5UkePHlXXrl21f/9+RURESJJSU1P1wAMP6Msvv1RQUJBWrFihX//618rPz5fVapUkzZo1Sxs3blR2drYkacSIESopKdHmzZvt9fTt21c9e/bUypUrTdVfXFwsX19fFRUVyWazVdfTAtxSbbycAdQlXJ6r3Zz5++22a5pyc3OVn5+v6Ohoe5uvr68iIyOVkZGhkSNHKiMjQ35+fvbAJEnR0dHy8PDQ3r179eijjyojI0MDBgywByZJiomJ0UsvvaQLFy6oWbNmysjIUGJiosP9x8TEVLlceKPS0lKVlpbaj4uLi6vhUcPVCCAAgG/jtqEpPz9fkhQQEODQHhAQYO/Lz8+Xv7+/Q3+DBg3UvHlzhzEhISFV5qjsa9asmfLz87/zfm4mKSlJ8+fPv41HBgCA69XGfyS6+qweWw7cptmzZ6uoqMh+O3nypKtLAgAANchtQ1NgYKAkqaCgwKG9oKDA3hcYGKjCwkKH/uvXr+v8+fMOY242x4338W1jKvtvxsvLSzabzeEGAADqLrcNTSEhIQoMDFRaWpq9rbi4WHv37lVUVJQkKSoqShcvXlRmZqZ9zI4dO1RRUaHIyEj7mN27d+vatWv2Mdu2bdOdd96pZs2a2cfceD+VYyrvBwAAwKWh6fLlyzp06JAOHTok6evF34cOHVJeXp4sFoumTp2qF154Qe+9956ysrI0duxYBQUF2T9hFxoaqiFDhujJJ5/Uvn379PHHH2vSpEkaOXKkgoKCJEmPPfaYrFarxo8fryNHjmjdunVaunSpw8Lvp59+Wqmpqfr973+v7OxsPf/88/rHP/6hSZMm/dBPCQAAcFMuXQj+j3/8Q/fdd5/9uDLIxMfHa82aNXr22WdVUlKiX/ziF7p48aJ+/OMfKzU1Vd7e3vbfefPNNzVp0iQNGjRIHh4eiouL07Jly+z9vr6++vDDD5WQkKDw8HC1bNlSc+fOddjLqV+/flq7dq3mzJmj//f//p86d+6sjRs3qlu3bj/AswAAAGoDt9mnqbZjn6a6oTZ+mgSAa7n6E123qzb+/64mnmtn/n677ZomAAAAd0JoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACa4dHNLAABqu9q43xFuD2eaAAAATCA0AQAAmEBoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNAEAAJjA16igxvDVAgCAuoQzTQAAACYQmgAAAEwgNAEAAJhAaAIAADCB0AQAAGACoQkAAMAEQhMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADABEITAACACYQmAAAAEwhNAAAAJhCaAAAATCA0AQAAmEBoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNAEAAJhAaAIAADCB0AQAAGACoQkAAMAEQhMAAIAJDVxdAMzpMCvF1SUAAFCvcaYJAADABEITAACACYQmAAAAEwhNAAAAJhCaAAAATCA0AQAAmEBoAgAAMIHQBAAAYAKhCQAAwARCEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNAEAAJhAaAIAADCB0AQAAGACoQkAAMAEQtM3LF++XB06dJC3t7ciIyO1b98+V5cEAADcAKHpBuvWrVNiYqLmzZunAwcOKCwsTDExMSosLHR1aQAAwMUITTd4+eWX9eSTT2rcuHHq2rWrVq5cqUaNGunPf/6zq0sDAAAu1sDVBbiLsrIyZWZmavbs2fY2Dw8PRUdHKyMjo8r40tJSlZaW2o+LiookScXFxTVSX0XpVzUyLwAAtUVN/I2tnNMwjFuOJTT919mzZ1VeXq6AgACH9oCAAGVnZ1cZn5SUpPnz51dpb9euXY3VCABAfea7pObmvnTpknx9fb9zDKHpNs2ePVuJiYn244qKCn3xxRfq2bOnTp48KZvN5sLqYEZxcbHatWvH61UL8FrVHrxWtQuv19dnmC5duqSgoKBbjiU0/VfLli3l6empgoICh/aCggIFBgZWGe/l5SUvLy+HNg+Pr5eI2Wy2evsfX23E61V78FrVHrxWtUt9f71udYapEgvB/8tqtSo8PFxpaWn2toqKCqWlpSkqKsqFlQEAAHfAmaYbJCYmKj4+XhEREerTp4+WLFmikpISjRs3ztWlAQAAFyM03WDEiBE6c+aM5s6dq/z8fPXs2VOpqalVFod/Gy8vL82bN6/KZTu4J16v2oPXqvbgtapdeL2cYzHMfMYOAACgnmNNEwAAgAmEJgAAABMITQAAACYQmgAAAEwgNFWj5cuXq0OHDvL29lZkZKT27dvn6pLwDc8//7wsFovDrUuXLq4uC/+1e/duPfTQQwoKCpLFYtHGjRsd+g3D0Ny5c9W6dWv5+PgoOjpan3/+uWuKredu9Vo9/vjjVd5rQ4YMcU2x9VxSUpJ69+6tpk2byt/fX0OHDlVOTo7DmKtXryohIUEtWrRQkyZNFBcXV2WzZxCaqs26deuUmJioefPm6cCBAwoLC1NMTIwKCwtdXRq+4a677tLp06ftt48++sjVJeG/SkpKFBYWpuXLl9+0f9GiRVq2bJlWrlypvXv3qnHjxoqJidHVq1d/4Epxq9dKkoYMGeLwXvvb3/72A1aISrt27VJCQoI++eQTbdu2TdeuXdPgwYNVUlJiHzNt2jS9//77Wr9+vXbt2qVTp05p2LBhLqzaTRmoFn369DESEhLsx+Xl5UZQUJCRlJTkwqrwTfPmzTPCwsJcXQZMkGS8++679uOKigojMDDQWLx4sb3t4sWLhpeXl/G3v/3NBRWi0jdfK8MwjPj4eOORRx5xST34boWFhYYkY9euXYZhfP0+atiwobF+/Xr7mKNHjxqSjIyMDFeV6ZY401QNysrKlJmZqejoaHubh4eHoqOjlZGR4cLKcDOff/65goKC1LFjR40ePVp5eXmuLgkm5ObmKj8/3+F95uvrq8jISN5nbio9PV3+/v6688479dRTT+ncuXOuLgmSioqKJEnNmzeXJGVmZuratWsO760uXbooODiY99Y3EJqqwdmzZ1VeXl5l5/CAgADl5+e7qCrcTGRkpNasWaPU1FStWLFCubm5uueee3Tp0iVXl4ZbqHwv8T6rHYYMGaL//d//VVpaml566SXt2rVL999/v8rLy11dWr1WUVGhqVOnqn///urWrZukr99bVqtVfn5+DmN5b1XF16igXrn//vvtP/fo0UORkZFq37693n77bY0fP96FlQF1y8iRI+0/d+/eXT169NCPfvQjpaena9CgQS6srH5LSEjQ4cOHWct5mzjTVA1atmwpT0/PKp80KCgoUGBgoIuqghl+fn664447dOzYMVeXgluofC/xPqudOnbsqJYtW/Jec6FJkyZp8+bN2rlzp9q2bWtvDwwMVFlZmS5evOgwnvdWVYSmamC1WhUeHq60tDR7W0VFhdLS0hQVFeXCynArly9f1vHjx9W6dWtXl4JbCAkJUWBgoMP7rLi4WHv37uV9Vgt8+eWXOnfuHO81FzAMQ5MmTdK7776rHTt2KCQkxKE/PDxcDRs2dHhv5eTkKC8vj/fWN3B5rpokJiYqPj5eERER6tOnj5YsWaKSkhKNGzfO1aXhBs8884weeughtW/fXqdOndK8efPk6empUaNGubo06OsQe+OZiNzcXB06dEjNmzdXcHCwpk6dqhdeeEGdO3dWSEiInnvuOQUFBWno0KGuK7qe+q7Xqnnz5po/f77i4uIUGBio48eP69lnn1WnTp0UExPjwqrrp4SEBK1du1abNm1S06ZN7euUfH195ePjI19fX40fP16JiYlq3ry5bDabJk+erKioKPXt29fF1bsZV398ry75wx/+YAQHBxtWq9Xo06eP8cknn7i6JHzDiBEjjNatWxtWq9Vo06aNMWLECOPYsWOuLgv/tXPnTkNSlVt8fLxhGF9vO/Dcc88ZAQEBhpeXlzFo0CAjJyfHtUXXU9/1Wn311VfG4MGDjVatWhkNGzY02rdvbzz55JNGfn6+q8uul272OkkyXn/9dfuYK1euGL/61a+MZs2aGY0aNTIeffRR4/Tp064r2k1ZDMMwfvioBgAAULuwpgkAAMAEQhMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJgGn33nuvpk6d6uoyJEnp6emyWCxVvmS0Ojz//PMKCAiQxWLRxo0bq33+6lSTz0NNzg3URoQmAG7vhwxrR48e1fz58/WnP/1Jp0+f1v333+/U7584cUIWi0WHDh2qmQIBuAxf2AsANzh+/Lgk6ZFHHpHFYnFxNT+MsrIyWa1WV5cBuD3ONAG4baWlpXrmmWfUpk0bNW7cWJGRkUpPT7f3r1mzRn5+ftq6datCQ0PVpEkTDRkyRKdPn7aPuX79uqZMmSI/Pz+1aNFCM2fOVHx8vIYOHSpJevzxx7Vr1y4tXbpUFotFFotFJ06csP9+ZmamIiIi1KhRI/Xr1085OTnfWXNWVpZ+8pOfyMfHRy1atNAvfvELXb58WdLXl+UeeughSZKHh8e3hqYLFy5o9OjRatWqlXx8fNS5c2e9/vrrkqSQkBBJ0t133y2LxaJ7771XkrR//3799Kc/VcuWLeXr66uBAwfqwIEDDvNaLBatXr1ajz76qBo1aqTOnTvrvffecxjzwQcf6I477pCPj4/uu+8+h+dCks6dO6dRo0apTZs2atSokbp3766//e1vDmPuvfdeTZo0SVOnTlXLli0VExNjam6g3nP1NwYDqD0GDhxoPP300/bjCRMmGP369TN2795tHDt2zFi8eLHh5eVlfPbZZ4ZhGMbrr79uNGzY0IiOjjb2799vZGZmGqGhocZjjz1mn+OFF14wmjdvbmzYsME4evSoMXHiRMNmsxmPPPKIYRiGcfHiRSMqKsp48sknjdOnTxunT582rl+/buzcudOQZERGRhrp6enGkSNHjHvuucfo16/ft9Z/+fJlo3Xr1sawYcOMrKwsIy0tzQgJCTHi4+MNwzCMS5cuGa+//rohyX5fN5OQkGD07NnT2L9/v5Gbm2ts27bNeO+99wzDMIx9+/YZkozt27cbp0+fNs6dO2cYhmGkpaUZf/nLX4yjR48a//rXv4zx48cbAQEBRnFxsX1eSUbbtm2NtWvXGp9//rkxZcoUo0mTJvY58vLyDC8vLyMxMdHIzs42/vrXvxoBAQGGJOPChQuGYRjGl19+aSxevNg4ePCgcfz4cWPZsmWGp6ensXfvXofXsUmTJsaMGTOM7OxsIzs729TcQH1HaAJg2o2h6YsvvjA8PT2N//znPw5jBg0aZMyePdswDMMeQI4dO2bvX758uREQEGA/DggIMBYvXmw/vn79uhEcHGwPTd+830qVoWn79u32tpSUFEOSceXKlZvWv2rVKqNZs2bG5cuXHX7Hw8PDyM/PNwzDMN59913jVv+efOihh4xx48bdtC83N9eQZBw8ePA75ygvLzeaNm1qvP/++/Y2ScacOXPsx5cvXzYkGVu2bDEMwzBmz55tdO3a1WGemTNn3jLYxMbGGtOnT7cfDxw40Lj77rsdxtzu3EB9wpomALclKytL5eXluuOOOxzaS0tL1aJFC/txo0aN9KMf/ch+3Lp1axUWFkqSioqKVFBQoD59+tj7PT09FR4eroqKClN19OjRw2FuSSosLFRwcHCVsUePHlVYWJgaN25sb+vfv78qKiqUk5OjgIAAU/f51FNPKS4uTgcOHNDgwYM1dOhQ9evX7zt/p6CgQHPmzFF6eroKCwtVXl6ur776Snl5ed/6eBo3biybzWZ/vo4eParIyEiH8VFRUQ7H5eXlevHFF/X222/rP//5j8rKylRaWqpGjRo5jAsPD3c4NjM3UN8RmgDclsuXL8vT01OZmZny9PR06GvSpIn954YNGzr0WSwWGYZRbXXcOH/lGiSzget23X///friiy/0wQcfaNu2bRo0aJASEhL0u9/97lt/Jz4+XufOndPSpUvVvn17eXl5KSoqSmVlZQ7jbvZ8OfN4Fi9erKVLl2rJkiXq3r27GjdurKlTp1a5nxuDIwBzWAgO4LbcfffdKi8vV2FhoTp16uRwCwwMNDWHr6+vAgICtH//fntbeXl5lQXSVqtV5eXl37vm0NBQffrppyopKbG3ffzxx/Lw8NCdd97p1FytWrVSfHy8/vrXv2rJkiVatWqVvVZJVer9+OOPNWXKFD3wwAO666675OXlpbNnzzpd/759+xzaPvnkkyr388gjj+jnP/+5wsLC1LFjR3322WfVMjdQ3xGaANyWO+64Q6NHj9bYsWO1YcMG5ebmat++fUpKSlJKSorpeSZPnqykpCRt2rRJOTk5evrpp3XhwgWHT6516NBBe/fu1YkTJ3T27NnbPpM0evRoeXt7Kz4+XocPH9bOnTs1efJkjRkzxvSlOUmaO3euNm3apGPHjunIkSPavHmzQkNDJUn+/v7y8fFRamqqCgoKVFRUJEnq3Lmz/vKXv+jo0aPau3evRo8eLR8fH6fqnzhxoj7//HPNmDFDOTk5Wrt2rdasWeMwpnPnztq2bZv27Nmjo0eP6pe//KUKCgqqZW6gviM0Abhtr7/+usaOHavp06frzjvv1NChQ7V///6brif6NjNnztSoUaM0duxYRUVFqUmTJoqJiZG3t7d9zDPPPCNPT0917dpVrVq1qrIOyKxGjRpp69atOn/+vHr37q3hw4dr0KBBSk5Odmoeq9Wq2bNnq0ePHhowYIA8PT311ltvSZIaNGigZcuW6U9/+pOCgoL0yCOPSJJee+01XbhwQb169dKYMWM0ZcoU+fv7O3W/wcHBeuedd7Rx40aFhYVp5cqVevHFFx3GzJkzR7169VJMTIzuvfdeBQYG2rdv+L5zA/WdxajOxQUA8D1VVFQoNDRU//M//6Pf/OY3ri4HAOxYCA7Apb744gt9+OGHGjhwoEpLS5WcnKzc3Fw99thjri4NABxweQ6AS3l4eGjNmjXq3bu3+vfvr6ysLG3fvt2+RggA3AWX5wAAAEzgTBMAAIAJhCYAAAATCE0AAAAmEJoAAABMIDQBAACYQGgCAAAwgdAEAABgAqEJAADAhP8PB9BZy3JuP1IAAAAASUVORK5CYII=\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "SOS_token = 0\n", + "EOS_token = 1\n", + "UNK_token = 2\n", + "PAD_token = 3\n", + "\n", + "class Lang:\n", + " def __init__(self, name):\n", + " self.name = name\n", + " self.word2index = {\"UNK\": 2}\n", + " self.word2count = {}\n", + " self.index2word = {0: \"SOS\", 1: \"EOS\", 2: \"UNK\", 3: \"PAD\"}\n", + " self.n_words = 4 # SOS, EOS, UNK, PAD\n", + "\n", + " def addSentence(self, sentence):\n", + " for word in sentence.split(\" \"):\n", + " self.addWord(word)\n", + "\n", + " def addWord(self, word):\n", + " if word not in self.word2index:\n", + " self.word2index[word] = self.n_words\n", + " self.word2count[word] = 1\n", + " self.index2word[self.n_words] = word\n", + " self.n_words += 1\n", + " else:\n", + " self.word2count[word] += 1\n", + "\n", + " def getWordIndex(self, word):\n", + " return self.word2index.get(word, self.word2index[\"UNK\"])" + ], + "metadata": { + "id": "oMl0xGNU49XX" + }, + "execution_count": 61, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Lang 객체 생성\n", + "dialect_lang = Lang(\"Dialect\")\n", + "standard_lang = Lang(\"Standard\")\n", + "\n", + "# 문장 추가\n", + "for sentence in filtered_dialect[:10]:\n", + " dialect_lang.addSentence(sentence)\n", + "for sentence in filtered_standard[:10]:\n", + " standard_lang.addSentence(sentence)\n", + "\n", + "for sentence in filtered_df_VL['src'][:10]:\n", + " dialect_lang.addSentence(sentence)\n", + "for sentence in filtered_df_VL['tar'][:10]:\n", + " standard_lang.addSentence(sentence)\n", + "\n", + "# 문장\n", + "pairs = list(zip(filtered_dialect[:10], filtered_standard[:10]))\n", + "VL_pairs = list(zip(filtered_df_VL['src'][:10], filtered_df_VL['tar'][:10]))\n", + "\n", + "# 문장을 인덱스로 변환\n", + "def indexesFromSentence(lang, sentence):\n", + " return [lang.getWordIndex(word) for word in sentence.split(' ')]\n", + "\n", + "def tensorFromSentence(lang, sentence, max_length):\n", + " indexes = indexesFromSentence(lang, sentence)\n", + " indexes.append(EOS_token)\n", + " if len(indexes) < max_length:\n", + " indexes += [PAD_token] * (max_length - len(indexes))\n", + " elif len(indexes) > max_length:\n", + " indexes = indexes[:max_length-1] + [EOS_token]\n", + " return torch.tensor(indexes, dtype=torch.long).view(-1, 1)\n", + "\n", + "def tensorsFromPair(pair):\n", + " input_tensor = tensorFromSentence(dialect_lang, pair[0], max_len)\n", + " target_tensor = tensorFromSentence(standard_lang, pair[1], max_len)\n", + " return (input_tensor, target_tensor)" + ], + "metadata": { + "id": "VBPYjCbZ8l6k" + }, + "execution_count": 62, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "\n", + "max_len = 22\n", + "\n", + "# 검증 데이터를 인덱스로 변환\n", + "validation_input_tensors = [tensorFromSentence(dialect_lang, pair[0], max_len) for pair in VL_pairs]\n", + "validation_target_tensors = [tensorFromSentence(standard_lang, pair[1], max_len) for pair in VL_pairs]\n", + "\n", + "# 모델 정의\n", + "class EncoderRNN(nn.Module):\n", + " def __init__(self, input_size, hidden_size):\n", + " super(EncoderRNN, self).__init__()\n", + " self.hidden_size = hidden_size\n", + " self.embedding = nn.Embedding(input_size, hidden_size)\n", + " self.lstm = nn.LSTM(hidden_size, hidden_size)\n", + "\n", + " def forward(self, input, hidden):\n", + " embedded = self.embedding(input).view(1, 1, -1)\n", + " output, hidden = self.lstm(embedded, hidden)\n", + " return output, hidden\n", + "\n", + " def initHidden(self):\n", + " return (torch.zeros(1, 1, self.hidden_size),\n", + " torch.zeros(1, 1, self.hidden_size))\n", + "\n", + "class AttnDecoderRNN(nn.Module):\n", + " def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=max_len):\n", + " super(AttnDecoderRNN, self).__init__()\n", + " self.hidden_size = hidden_size\n", + " self.output_size = output_size\n", + " self.dropout_p = dropout_p\n", + " self.max_length = max_length\n", + "\n", + " self.embedding = nn.Embedding(self.output_size, self.hidden_size)\n", + " self.attn = nn.Linear(self.hidden_size * 2, self.max_length)\n", + " self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)\n", + " self.dropout = nn.Dropout(self.dropout_p)\n", + " self.lstm = nn.LSTM(self.hidden_size, self.hidden_size)\n", + " self.out = nn.Linear(self.hidden_size, self.output_size)\n", + "\n", + " def forward(self, input, hidden, encoder_outputs):\n", + " embedded = self.embedding(input).view(1, 1, -1)\n", + " embedded = self.dropout(embedded)\n", + "\n", + " attn_weights = nn.functional.softmax(\n", + " self.attn(torch.cat((embedded[0], hidden[0][0]), 1)), dim=1)\n", + " attn_applied = torch.bmm(attn_weights.unsqueeze(0),\n", + " encoder_outputs.unsqueeze(0))\n", + "\n", + " output = torch.cat((embedded[0], attn_applied[0]), 1)\n", + " output = self.attn_combine(output).unsqueeze(0)\n", + "\n", + " output = nn.functional.relu(output)\n", + " output, hidden = self.lstm(output, hidden)\n", + "\n", + " output = nn.functional.log_softmax(self.out(output[0]), dim=1)\n", + " return output, hidden, attn_weights\n", + "\n", + " def initHidden(self):\n", + " return (torch.zeros(1, 1, self.hidden_size),\n", + " torch.zeros(1, 1, self.hidden_size))" + ], + "metadata": { + "id": "EyqODVGn87BL" + }, + "execution_count": 63, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import random\n", + "import time\n", + "import math\n", + "\n", + "def asMinutes(s):\n", + " m = math.floor(s / 60)\n", + " s -= m * 60\n", + " return f'{m}m {s:.2f}s'\n", + "\n", + "def timeSince(since, percent):\n", + " now = time.time()\n", + " s = now - since\n", + " es = s / (percent)\n", + " rs = es - s\n", + " return f'{asMinutes(s)} (- {asMinutes(rs)})'\n", + "\n", + "# 모델 훈련 함수\n", + "def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=max_len):\n", + " encoder_hidden = encoder.initHidden()\n", + "\n", + " encoder_optimizer.zero_grad()\n", + " decoder_optimizer.zero_grad()\n", + "\n", + " input_length = input_tensor.size(0)\n", + " target_length = target_tensor.size(0)\n", + "\n", + " encoder_outputs = torch.zeros(max_length, encoder.hidden_size)\n", + "\n", + " loss = 0\n", + "\n", + " for ei in range(input_length):\n", + " encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)\n", + " encoder_outputs[ei] = encoder_output[0, 0]\n", + "\n", + " decoder_input = torch.tensor([[SOS_token]])\n", + "\n", + " decoder_hidden = encoder_hidden\n", + "\n", + " for di in range(target_length):\n", + " decoder_output, decoder_hidden, decoder_attention = decoder(\n", + " decoder_input, decoder_hidden, encoder_outputs)\n", + " topv, topi = decoder_output.topk(1)\n", + " decoder_input = topi.squeeze().detach() # 다음 입력으로 사용\n", + "\n", + " loss += criterion(decoder_output, target_tensor[di])\n", + " if decoder_input.item() == EOS_token:\n", + " break\n", + "\n", + " loss.backward()\n", + "\n", + " encoder_optimizer.step()\n", + " decoder_optimizer.step()\n", + "\n", + " return loss.item() / target_length\n", + "\n", + "def evaluate(encoder, decoder, input_tensor, target_tensor, criterion, max_length=max_len):\n", + " with torch.no_grad():\n", + " encoder_hidden = encoder.initHidden()\n", + "\n", + " input_length = input_tensor.size(0)\n", + " target_length = target_tensor.size(0)\n", + "\n", + " encoder_outputs = torch.zeros(max_length, encoder.hidden_size)\n", + "\n", + " loss = 0\n", + "\n", + " for ei in range(input_length):\n", + " encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)\n", + " encoder_outputs[ei] = encoder_output[0, 0]\n", + "\n", + " decoder_input = torch.tensor([[SOS_token]])\n", + "\n", + " decoder_hidden = encoder_hidden\n", + "\n", + " for di in range(target_length):\n", + " decoder_output, decoder_hidden, decoder_attention = decoder(\n", + " decoder_input, decoder_hidden, encoder_outputs)\n", + " topv, topi = decoder_output.topk(1)\n", + " decoder_input = topi.squeeze().detach() # 다음 입력으로 사용\n", + "\n", + " loss += criterion(decoder_output, target_tensor[di])\n", + " if decoder_input.item() == EOS_token:\n", + " break\n", + "\n", + " return loss.item() / target_length\n", + "\n", + "def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):\n", + " start = time.time()\n", + " plot_losses = []\n", + " plot_val_losses = []\n", + " print_loss_total = 0 # Reset every print_every\n", + " plot_loss_total = 0 # Reset every plot_every\n", + " val_loss_total = 0\n", + " val_loss_avg = 0\n", + "\n", + " encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)\n", + " decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)\n", + " training_pairs = [tensorsFromPair(random.choice(pairs)) for _ in range(n_iters)]\n", + " criterion = nn.NLLLoss()\n", + "\n", + " for iter in range(1, n_iters + 1):\n", + " training_pair = training_pairs[iter - 1]\n", + " input_tensor = training_pair[0]\n", + " target_tensor = training_pair[1]\n", + "\n", + " loss = train(input_tensor, target_tensor, encoder,\n", + " decoder, encoder_optimizer, decoder_optimizer, criterion)\n", + " print_loss_total += loss\n", + " plot_loss_total += loss\n", + "\n", + " # 검증 데이터에 대한 손실 계산\n", + " if iter % print_every == 0:\n", + " val_loss_total = 0\n", + " for val_input, val_target in zip(validation_input_tensors, validation_target_tensors):\n", + " val_loss = evaluate(encoder, decoder, val_input, val_target, criterion)\n", + " val_loss_total += val_loss\n", + "\n", + " val_loss_avg = val_loss_total / len(validation_input_tensors)\n", + " print_loss_avg = print_loss_total / print_every\n", + " print_loss_total = 0\n", + " print(f'{timeSince(start, iter / n_iters)} ({iter} {iter / n_iters * 100:.2f}%) '\n", + " f'Train Loss: {print_loss_avg:.4f}, Val Loss: {val_loss_avg:.4f}')\n", + "\n", + " if iter % plot_every == 0:\n", + " plot_loss_avg = plot_loss_total / plot_every\n", + " plot_val_losses.append(val_loss_avg)\n", + " plot_losses.append(plot_loss_avg)\n", + " plot_loss_total = 0\n", + "\n", + " showPlot(plot_losses, plot_val_losses)\n", + "\n", + "def showPlot(train_losses, val_losses):\n", + " plt.figure()\n", + " plt.plot(train_losses, label='Training Loss')\n", + " plt.plot(val_losses, label='Validation Loss')\n", + " plt.title('Training and Validation Loss')\n", + " plt.xlabel('Iterations')\n", + " plt.ylabel('Loss')\n", + " plt.legend()\n", + " plt.show()" + ], + "metadata": { + "id": "Uaozw3dc_vdk" + }, + "execution_count": 68, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# 모델 초기화 및 훈련\n", + "hidden_size = 256\n", + "encoder = EncoderRNN(dialect_lang.n_words, hidden_size)\n", + "decoder = AttnDecoderRNN(hidden_size, standard_lang.n_words, dropout_p=0.1)\n", + "\n", + "trainIters(encoder, decoder, 1000, print_every=100, plot_every=50) # 적은 수의 iteration으로 실행" + ], + "metadata": { + "id": "JLgmcaB5UKtN", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 656 + }, + "outputId": "c12315c4-2421-4ee8-c38e-8609fd2af79f" + }, + "execution_count": 69, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "0m 12.51s (- 1m 52.60s) (100 10.00%) Train Loss: 2.6009, Val Loss: 3.2177\n", + "0m 22.69s (- 1m 30.76s) (200 20.00%) Train Loss: 1.6699, Val Loss: 4.1320\n", + "0m 33.58s (- 1m 18.36s) (300 30.00%) Train Loss: 1.5512, Val Loss: 4.4176\n", + "0m 44.24s (- 1m 6.37s) (400 40.00%) Train Loss: 1.4290, Val Loss: 4.4798\n", + "0m 55.07s (- 0m 55.07s) (500 50.00%) Train Loss: 1.3086, Val Loss: 4.1685\n", + "1m 5.12s (- 0m 43.41s) (600 60.00%) Train Loss: 1.2437, Val Loss: 5.1896\n", + "1m 15.75s (- 0m 32.47s) (700 70.00%) Train Loss: 1.1878, Val Loss: 5.2546\n", + "1m 26.22s (- 0m 21.56s) (800 80.00%) Train Loss: 1.0526, Val Loss: 4.7955\n", + "1m 36.49s (- 0m 10.72s) (900 90.00%) Train Loss: 0.6964, Val Loss: 5.1462\n", + "1m 45.93s (- 0m 0.00s) (1000 100.00%) Train Loss: 0.7646, Val Loss: 5.2876\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "def saveModel(encoder, decoder, encoder_path='encoder.pth', decoder_path='decoder.pth'): ## 모델 저장\n", + " torch.save(encoder.state_dict(), encoder_path)\n", + " torch.save(decoder.state_dict(), decoder_path)" + ], + "metadata": { + "id": "_Gjpck4MUHBm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def loadModel(encoder_path='encoder.pth', decoder_path='decoder.pth'): ## 모델 로드\n", + " encoder = EncoderRNN(dialect_lang.n_words, hidden_size)\n", + " decoder = AttnDecoderRNN(hidden_size, standard_lang.n_words, dropout_p=0.1)\n", + " encoder.load_state_dict(torch.load(encoder_path))\n", + " decoder.load_state_dict(torch.load(decoder_path))\n", + " return encoder, decoder" + ], + "metadata": { + "id": "t3WSCqgH-djn" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "encoder_path = '/content/drive/MyDrive/LSTM+attention/test_encoder.pth'\n", + "decoder_path = '/content/drive/MyDrive/LSTM+attention/test_decoder.pth'" + ], + "metadata": { + "id": "TqE3502bKBs5" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "saveModel(encoder, decoder, encoder_path, decoder_path)" + ], + "metadata": { + "id": "W6R4lLLE-gqu" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# 모델 평가 함수\n", + "def evaluateRandomly(encoder, decoder, n=10):\n", + " for i in range(n):\n", + " pair = random.choice(test_pairs)\n", + " print('Dialect:', pair[0])\n", + " print('Expected:', pair[1])\n", + " output_words = evaluate(encoder, decoder, pair[0])\n", + " output_sentence = ' '.join(output_words)\n", + " print('Predicted:', output_sentence)\n", + " print('')\n" + ], + "metadata": { + "id": "zYySN_5AUvbG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "## 테스트 데이터 준비 필요\n", + "test_dialect_sentences = [\"밥 무나?\", \"와 이리 더운교?\", \"이거 맛있다카이\", \"오늘 날씨 좋네예\"]\n", + "test_standard_sentences = [\"밥 먹었니?\", \"왜 이렇게 덥지?\", \"이거 맛있다고 하네\", \"오늘 날씨 좋네\"]\n", + "\n", + "test_pairs = list(zip(test_dialect_sentences, test_standard_sentences))" + ], + "metadata": { + "id": "ch8xAa69U5DA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# 저장된 모델 불러오기\n", + "encoder, decoder = loadModel(encoder_path, decoder_path)\n", + "\n", + "hidden_size = 256\n", + "encoder = EncoderRNN(dialect_lang.n_words, hidden_size)\n", + "decoder = AttnDecoderRNN(hidden_size, standard_lang.n_words, dropout_p=0.1)\n", + "\n", + "evaluateRandomly(encoder, decoder, n=len(test_pairs))" + ], + "metadata": { + "id": "THo_PKRYM4vP" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "## 테스트 함수 실행\n", + "evaluateRandomly(encoder, decoder, n=len(test_pairs))" + ], + "metadata": { + "id": "JQNbhsGTVRCe" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file