diff --git a/AutomaticSentenceCompletion.ipynb b/AutomaticSentenceCompletion.ipynb index dd28b1a..513fd5b 100644 --- a/AutomaticSentenceCompletion.ipynb +++ b/AutomaticSentenceCompletion.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 5, "id": "e444b44c", "metadata": {}, "outputs": [], @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 15, "id": "adfb256a", "metadata": {}, "outputs": [], @@ -76,44 +76,33 @@ }, { "cell_type": "code", - "execution_count": 31, - "id": "00481ec9", + "execution_count": 16, + "id": "bf797cc6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "There are 1600 records for Cancer[tiab].\n" - ] - } - ], + "outputs": [], "source": [ - "myQuery =\"Cancer\"+\"[tiab]\" #query in title and abstract\n", - "maxPapers = 1600 # thinkabout outsourcing params to seperate section\n", - "records = getPapers(myQuery, maxPapers)\n" + "amountOfPapers = 20000" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "56cf72de", + "execution_count": 20, + "id": "00481ec9", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "1600" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "There are 20000 records for Cancer [tiab].\n" + ] } ], "source": [ - "len(records)" + "myQuery =\"Cancer [tiab]\" #query in title and abstract\n", + "maxPapers = amountOfPapers\n", + "records = getPapers(myQuery, maxPapers)" ] }, { @@ -126,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 21, "id": "dcf5c217", "metadata": {}, "outputs": [], @@ -137,27 +126,6 @@ " r_abstracts.append(r['AB'])" ] }, - { - "cell_type": "code", - "execution_count": 34, - "id": "eb1fb38b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1532" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(r_abstracts)" - ] - }, { "cell_type": "markdown", "id": "e309f6fe", @@ -168,10 +136,19 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 22, "id": "c3199444", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/hein/.local/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "try:\n", " import torch\n", @@ -195,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 23, "id": "daca9db6", "metadata": {}, "outputs": [], @@ -217,37 +194,49 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 24, + "id": "0e838dae", + "metadata": {}, + "outputs": [], + "source": [ + "train_size = math.floor(len(r_abstracts) * 0.75)\n", + "val_size = math.floor(len(r_abstracts) * 0.125)\n", + "test_size = math.floor(len(r_abstracts) * 0.125)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, "id": "8a128d3c", "metadata": {}, "outputs": [], "source": [ "def train_abstract_iter():\n", - " for abstract in r_abstracts[:1000]:\n", + " for abstract in r_abstracts[:train_size]:\n", " yield abstract" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 26, "id": "97e89986", "metadata": {}, "outputs": [], "source": [ "def val_abstract_iter():\n", - " for abstract in r_abstracts[1001:1300]:\n", + " for abstract in r_abstracts[(train_size + 1):(train_size + val_size)]:\n", " yield abstract" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 27, "id": "0d6e89c4", "metadata": {}, "outputs": [], "source": [ "def test_abstract_iter():\n", - " for abstract in r_abstracts[1301:1542]:\n", + " for abstract in r_abstracts[(train_size + val_size + 1): (train_size + val_size + test_size)]:\n", " yield abstract" ] }, @@ -261,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 28, "id": "0bdbc40a", "metadata": {}, "outputs": [], @@ -282,7 +271,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 29, "id": "a438ab1f", "metadata": {}, "outputs": [], @@ -303,7 +292,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 30, "id": "0e5bc361", "metadata": {}, "outputs": [], @@ -316,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 31, "id": "dfd7400d", "metadata": {}, "outputs": [], @@ -339,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 32, "id": "c155ee31", "metadata": {}, "outputs": [], @@ -349,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 33, "id": "79b2d248", "metadata": {}, "outputs": [ @@ -359,7 +348,7 @@ "device(type='cuda')" ] }, - "execution_count": 45, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -378,7 +367,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 34, "id": "a33d722f", "metadata": {}, "outputs": [], @@ -436,7 +425,7 @@ }, { "cell_type": "markdown", - "id": "3b78cc08", + "id": "23268efe", "metadata": {}, "source": [ "### define pos encoder" @@ -444,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 35, "id": "c2f6d33b", "metadata": {}, "outputs": [], @@ -473,7 +462,7 @@ }, { "cell_type": "markdown", - "id": "0adefcce", + "id": "306352f5", "metadata": {}, "source": [ "### define function to create batches of data and create batches" @@ -481,7 +470,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 36, "id": "9e184841", "metadata": {}, "outputs": [], @@ -505,7 +494,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 37, "id": "a4def1ac", "metadata": {}, "outputs": [], @@ -519,7 +508,7 @@ }, { "cell_type": "markdown", - "id": "4f407ad0", + "id": "c658cb42", "metadata": {}, "source": [ "### define function to get batch" @@ -527,7 +516,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 38, "id": "4ab5b8fd", "metadata": {}, "outputs": [], @@ -551,7 +540,7 @@ }, { "cell_type": "markdown", - "id": "7ee28c38", + "id": "d6392484", "metadata": {}, "source": [ "### define parameters and init model" @@ -559,7 +548,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 39, "id": "c53764da", "metadata": {}, "outputs": [], @@ -575,7 +564,7 @@ }, { "cell_type": "markdown", - "id": "51f2400a", + "id": "7fb67d72", "metadata": {}, "source": [ "### init optimizer, loss, scheduler etc." @@ -583,8 +572,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "b9a04e07", + "execution_count": 40, + "id": "ddaa1d64", "metadata": {}, "outputs": [], "source": [ @@ -599,7 +588,7 @@ }, { "cell_type": "markdown", - "id": "07317af8", + "id": "dda19446", "metadata": {}, "source": [ "### define train function" @@ -607,7 +596,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 41, "id": "50ab3fb6", "metadata": {}, "outputs": [], @@ -649,7 +638,7 @@ }, { "cell_type": "markdown", - "id": "23709949", + "id": "9756c092", "metadata": {}, "source": [ "### define evaluate function" @@ -657,8 +646,8 @@ }, { "cell_type": "code", - "execution_count": 289, - "id": "689bd4ea", + "execution_count": 42, + "id": "3d179bb0", "metadata": {}, "outputs": [], "source": [ @@ -680,7 +669,7 @@ }, { "cell_type": "markdown", - "id": "d7c6a1e0", + "id": "5a959f09", "metadata": {}, "source": [ "### now we can start training the model while saving best one" @@ -688,7 +677,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 43, "id": "09c4d4ce", "metadata": { "scrolled": true @@ -698,52 +687,860 @@ "name": "stdout", "output_type": "stream", "text": [ - "| epoch 1 | 200/ 383 batches | lr 5.00 | ms/batch 63.06 | loss 8.09 | ppl 3258.10\n", + "| epoch 1 | 200/ 2727 batches | lr 5.00 | ms/batch 94.99 | loss 8.87 | ppl 7091.23\n", + "| epoch 1 | 400/ 2727 batches | lr 5.00 | ms/batch 53.24 | loss 7.14 | ppl 1267.54\n", + "| epoch 1 | 600/ 2727 batches | lr 5.00 | ms/batch 53.30 | loss 6.57 | ppl 713.79\n", + "| epoch 1 | 800/ 2727 batches | lr 5.00 | ms/batch 53.30 | loss 6.34 | ppl 569.42\n", + "| epoch 1 | 1000/ 2727 batches | lr 5.00 | ms/batch 53.27 | loss 6.13 | ppl 460.15\n", + "| epoch 1 | 1200/ 2727 batches | lr 5.00 | ms/batch 53.36 | loss 6.04 | ppl 421.52\n", + "| epoch 1 | 1400/ 2727 batches | lr 5.00 | ms/batch 53.37 | loss 5.92 | ppl 371.29\n", + "| epoch 1 | 1600/ 2727 batches | lr 5.00 | ms/batch 53.36 | loss 5.76 | ppl 318.28\n", + "| epoch 1 | 1800/ 2727 batches | lr 5.00 | ms/batch 53.46 | loss 5.87 | ppl 354.01\n", + "| epoch 1 | 2000/ 2727 batches | lr 5.00 | ms/batch 53.47 | loss 5.80 | ppl 328.82\n", + "| epoch 1 | 2200/ 2727 batches | lr 5.00 | ms/batch 53.44 | loss 5.72 | ppl 304.20\n", + "| epoch 1 | 2400/ 2727 batches | lr 5.00 | ms/batch 53.45 | loss 5.75 | ppl 313.57\n", + "| epoch 1 | 2600/ 2727 batches | lr 5.00 | ms/batch 53.50 | loss 5.73 | ppl 307.48\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 1 | time: 160.70s | valid loss 5.61 | valid ppl 273.67\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 2 | 200/ 2727 batches | lr 4.75 | ms/batch 53.84 | loss 5.66 | ppl 286.69\n", + "| epoch 2 | 400/ 2727 batches | lr 4.75 | ms/batch 53.58 | loss 5.55 | ppl 258.35\n", + "| epoch 2 | 600/ 2727 batches | lr 4.75 | ms/batch 53.58 | loss 5.50 | ppl 245.61\n", + "| epoch 2 | 800/ 2727 batches | lr 4.75 | ms/batch 53.58 | loss 5.51 | ppl 248.30\n", + "| epoch 2 | 1000/ 2727 batches | lr 4.75 | ms/batch 53.60 | loss 5.44 | ppl 229.54\n", + "| epoch 2 | 1200/ 2727 batches | lr 4.75 | ms/batch 53.59 | loss 5.45 | ppl 233.38\n", + "| epoch 2 | 1400/ 2727 batches | lr 4.75 | ms/batch 53.62 | loss 5.39 | ppl 219.41\n", + "| epoch 2 | 1600/ 2727 batches | lr 4.75 | ms/batch 53.61 | loss 5.29 | ppl 197.66\n", + "| epoch 2 | 1800/ 2727 batches | lr 4.75 | ms/batch 53.58 | loss 5.44 | ppl 229.46\n", + "| epoch 2 | 2000/ 2727 batches | lr 4.75 | ms/batch 53.58 | loss 5.40 | ppl 221.35\n", + "| epoch 2 | 2200/ 2727 batches | lr 4.75 | ms/batch 53.59 | loss 5.35 | ppl 210.15\n", + "| epoch 2 | 2400/ 2727 batches | lr 4.75 | ms/batch 53.57 | loss 5.39 | ppl 219.29\n", + "| epoch 2 | 2600/ 2727 batches | lr 4.75 | ms/batch 53.58 | loss 5.39 | ppl 220.01\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 2 | time: 152.99s | valid loss 5.43 | valid ppl 228.25\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 3 | 200/ 2727 batches | lr 4.51 | ms/batch 53.88 | loss 5.37 | ppl 213.86\n", + "| epoch 3 | 400/ 2727 batches | lr 4.51 | ms/batch 53.59 | loss 5.27 | ppl 195.07\n", + "| epoch 3 | 600/ 2727 batches | lr 4.51 | ms/batch 53.61 | loss 5.25 | ppl 189.68\n", + "| epoch 3 | 800/ 2727 batches | lr 4.51 | ms/batch 53.59 | loss 5.26 | ppl 192.06\n", + "| epoch 3 | 1000/ 2727 batches | lr 4.51 | ms/batch 53.61 | loss 5.18 | ppl 177.42\n", + "| epoch 3 | 1200/ 2727 batches | lr 4.51 | ms/batch 53.58 | loss 5.23 | ppl 186.03\n", + "| epoch 3 | 1400/ 2727 batches | lr 4.51 | ms/batch 53.57 | loss 5.16 | ppl 174.77\n", + "| epoch 3 | 1600/ 2727 batches | lr 4.51 | ms/batch 53.58 | loss 5.07 | ppl 158.60\n", + "| epoch 3 | 1800/ 2727 batches | lr 4.51 | ms/batch 53.57 | loss 5.21 | ppl 182.78\n", + "| epoch 3 | 2000/ 2727 batches | lr 4.51 | ms/batch 53.55 | loss 5.19 | ppl 179.00\n", + "| epoch 3 | 2200/ 2727 batches | lr 4.51 | ms/batch 53.57 | loss 5.14 | ppl 171.26\n", + "| epoch 3 | 2400/ 2727 batches | lr 4.51 | ms/batch 53.57 | loss 5.19 | ppl 179.55\n", + "| epoch 3 | 2600/ 2727 batches | lr 4.51 | ms/batch 53.58 | loss 5.19 | ppl 179.28\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 3 | time: 152.97s | valid loss 5.39 | valid ppl 218.86\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 4 | 200/ 2727 batches | lr 4.29 | ms/batch 53.86 | loss 5.19 | ppl 178.71\n", + "| epoch 4 | 400/ 2727 batches | lr 4.29 | ms/batch 53.61 | loss 5.10 | ppl 164.53\n", + "| epoch 4 | 600/ 2727 batches | lr 4.29 | ms/batch 53.58 | loss 5.08 | ppl 161.14\n", + "| epoch 4 | 800/ 2727 batches | lr 4.29 | ms/batch 53.61 | loss 5.09 | ppl 162.64\n", + "| epoch 4 | 1000/ 2727 batches | lr 4.29 | ms/batch 53.60 | loss 5.03 | ppl 152.20\n", + "| epoch 4 | 1200/ 2727 batches | lr 4.29 | ms/batch 53.58 | loss 5.06 | ppl 158.23\n", + "| epoch 4 | 1400/ 2727 batches | lr 4.29 | ms/batch 53.61 | loss 5.01 | ppl 150.33\n", + "| epoch 4 | 1600/ 2727 batches | lr 4.29 | ms/batch 53.57 | loss 4.91 | ppl 136.30\n", + "| epoch 4 | 1800/ 2727 batches | lr 4.29 | ms/batch 53.60 | loss 5.06 | ppl 158.00\n", + "| epoch 4 | 2000/ 2727 batches | lr 4.29 | ms/batch 53.58 | loss 5.04 | ppl 154.90\n", + "| epoch 4 | 2200/ 2727 batches | lr 4.29 | ms/batch 53.58 | loss 5.00 | ppl 148.52\n", + "| epoch 4 | 2400/ 2727 batches | lr 4.29 | ms/batch 53.59 | loss 5.03 | ppl 153.47\n", + "| epoch 4 | 2600/ 2727 batches | lr 4.29 | ms/batch 53.59 | loss 5.04 | ppl 155.21\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 4 | time: 152.99s | valid loss 5.31 | valid ppl 202.73\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 5 | 200/ 2727 batches | lr 4.07 | ms/batch 53.89 | loss 5.05 | ppl 155.53\n", + "| epoch 5 | 400/ 2727 batches | lr 4.07 | ms/batch 53.60 | loss 4.96 | ppl 143.06\n", + "| epoch 5 | 600/ 2727 batches | lr 4.07 | ms/batch 53.60 | loss 4.95 | ppl 141.66\n", + "| epoch 5 | 800/ 2727 batches | lr 4.07 | ms/batch 53.60 | loss 4.96 | ppl 142.29\n", + "| epoch 5 | 1000/ 2727 batches | lr 4.07 | ms/batch 53.58 | loss 4.90 | ppl 134.14\n", + "| epoch 5 | 1200/ 2727 batches | lr 4.07 | ms/batch 53.59 | loss 4.94 | ppl 139.54\n", + "| epoch 5 | 1400/ 2727 batches | lr 4.07 | ms/batch 53.63 | loss 4.89 | ppl 132.99\n", + "| epoch 5 | 1600/ 2727 batches | lr 4.07 | ms/batch 53.59 | loss 4.79 | ppl 120.88\n", + "| epoch 5 | 1800/ 2727 batches | lr 4.07 | ms/batch 53.60 | loss 4.93 | ppl 138.94\n", + "| epoch 5 | 2000/ 2727 batches | lr 4.07 | ms/batch 53.61 | loss 4.92 | ppl 136.69\n", + "| epoch 5 | 2200/ 2727 batches | lr 4.07 | ms/batch 53.60 | loss 4.89 | ppl 132.41\n", + "| epoch 5 | 2400/ 2727 batches | lr 4.07 | ms/batch 53.58 | loss 4.92 | ppl 137.61\n", + "| epoch 5 | 2600/ 2727 batches | lr 4.07 | ms/batch 53.58 | loss 4.93 | ppl 138.39\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 5 | time: 153.00s | valid loss 5.29 | valid ppl 198.47\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 6 | 200/ 2727 batches | lr 3.87 | ms/batch 53.84 | loss 4.94 | ppl 139.65\n", + "| epoch 6 | 400/ 2727 batches | lr 3.87 | ms/batch 53.58 | loss 4.86 | ppl 128.66\n", + "| epoch 6 | 600/ 2727 batches | lr 3.87 | ms/batch 53.58 | loss 4.85 | ppl 127.88\n", + "| epoch 6 | 800/ 2727 batches | lr 3.87 | ms/batch 53.53 | loss 4.85 | ppl 127.75\n", + "| epoch 6 | 1000/ 2727 batches | lr 3.87 | ms/batch 53.53 | loss 4.79 | ppl 120.47\n", + "| epoch 6 | 1200/ 2727 batches | lr 3.87 | ms/batch 53.58 | loss 4.83 | ppl 125.73\n", + "| epoch 6 | 1400/ 2727 batches | lr 3.87 | ms/batch 53.57 | loss 4.79 | ppl 119.80\n", + "| epoch 6 | 1600/ 2727 batches | lr 3.87 | ms/batch 53.54 | loss 4.70 | ppl 109.73\n", + "| epoch 6 | 1800/ 2727 batches | lr 3.87 | ms/batch 53.57 | loss 4.83 | ppl 125.26\n", + "| epoch 6 | 2000/ 2727 batches | lr 3.87 | ms/batch 53.54 | loss 4.81 | ppl 123.24\n", + "| epoch 6 | 2200/ 2727 batches | lr 3.87 | ms/batch 53.55 | loss 4.78 | ppl 118.89\n", + "| epoch 6 | 2400/ 2727 batches | lr 3.87 | ms/batch 53.54 | loss 4.81 | ppl 122.48\n", + "| epoch 6 | 2600/ 2727 batches | lr 3.87 | ms/batch 53.57 | loss 4.82 | ppl 124.55\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 6 | time: 152.89s | valid loss 5.30 | valid ppl 200.09\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 7 | 200/ 2727 batches | lr 3.68 | ms/batch 53.82 | loss 4.83 | ppl 125.67\n", + "| epoch 7 | 400/ 2727 batches | lr 3.68 | ms/batch 53.57 | loss 4.75 | ppl 115.28\n", + "| epoch 7 | 600/ 2727 batches | lr 3.68 | ms/batch 53.57 | loss 4.74 | ppl 114.88\n", + "| epoch 7 | 800/ 2727 batches | lr 3.68 | ms/batch 53.59 | loss 4.75 | ppl 115.36\n", + "| epoch 7 | 1000/ 2727 batches | lr 3.68 | ms/batch 53.84 | loss 4.69 | ppl 109.13\n", + "| epoch 7 | 1200/ 2727 batches | lr 3.68 | ms/batch 53.60 | loss 4.74 | ppl 114.06\n", + "| epoch 7 | 1400/ 2727 batches | lr 3.68 | ms/batch 53.53 | loss 4.69 | ppl 109.22\n", + "| epoch 7 | 1600/ 2727 batches | lr 3.68 | ms/batch 53.60 | loss 4.61 | ppl 100.03\n", + "| epoch 7 | 1800/ 2727 batches | lr 3.68 | ms/batch 53.54 | loss 4.73 | ppl 112.89\n", + "| epoch 7 | 2000/ 2727 batches | lr 3.68 | ms/batch 53.54 | loss 4.71 | ppl 111.57\n", + "| epoch 7 | 2200/ 2727 batches | lr 3.68 | ms/batch 53.56 | loss 4.68 | ppl 107.95\n", + "| epoch 7 | 2400/ 2727 batches | lr 3.68 | ms/batch 53.53 | loss 4.71 | ppl 111.61\n", + "| epoch 7 | 2600/ 2727 batches | lr 3.68 | ms/batch 53.54 | loss 4.73 | ppl 112.94\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 7 | time: 152.93s | valid loss 5.28 | valid ppl 196.84\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 8 | 200/ 2727 batches | lr 3.49 | ms/batch 53.84 | loss 4.74 | ppl 114.63\n", + "| epoch 8 | 400/ 2727 batches | lr 3.49 | ms/batch 53.54 | loss 4.67 | ppl 106.30\n", + "| epoch 8 | 600/ 2727 batches | lr 3.49 | ms/batch 53.55 | loss 4.66 | ppl 105.59\n", + "| epoch 8 | 800/ 2727 batches | lr 3.49 | ms/batch 53.53 | loss 4.66 | ppl 105.78\n", + "| epoch 8 | 1000/ 2727 batches | lr 3.49 | ms/batch 53.54 | loss 4.61 | ppl 100.14\n", + "| epoch 8 | 1200/ 2727 batches | lr 3.49 | ms/batch 53.50 | loss 4.65 | ppl 104.40\n", + "| epoch 8 | 1400/ 2727 batches | lr 3.49 | ms/batch 53.54 | loss 4.61 | ppl 100.21\n", + "| epoch 8 | 1600/ 2727 batches | lr 3.49 | ms/batch 53.53 | loss 4.52 | ppl 91.89\n", + "| epoch 8 | 1800/ 2727 batches | lr 3.49 | ms/batch 53.54 | loss 4.64 | ppl 103.21\n", + "| epoch 8 | 2000/ 2727 batches | lr 3.49 | ms/batch 53.53 | loss 4.63 | ppl 102.02\n", + "| epoch 8 | 2200/ 2727 batches | lr 3.49 | ms/batch 53.54 | loss 4.60 | ppl 99.14\n", + "| epoch 8 | 2400/ 2727 batches | lr 3.49 | ms/batch 53.68 | loss 4.63 | ppl 102.63\n", + "| epoch 8 | 2600/ 2727 batches | lr 3.49 | ms/batch 53.52 | loss 4.64 | ppl 103.81\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 8 | time: 152.84s | valid loss 5.30 | valid ppl 200.97\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 9 | 200/ 2727 batches | lr 3.32 | ms/batch 53.78 | loss 4.66 | ppl 105.35\n", + "| epoch 9 | 400/ 2727 batches | lr 3.32 | ms/batch 53.55 | loss 4.57 | ppl 96.89\n", + "| epoch 9 | 600/ 2727 batches | lr 3.32 | ms/batch 53.55 | loss 4.57 | ppl 96.68\n", + "| epoch 9 | 800/ 2727 batches | lr 3.32 | ms/batch 53.52 | loss 4.58 | ppl 97.08\n", + "| epoch 9 | 1000/ 2727 batches | lr 3.32 | ms/batch 53.54 | loss 4.53 | ppl 92.32\n", + "| epoch 9 | 1200/ 2727 batches | lr 3.32 | ms/batch 53.54 | loss 4.57 | ppl 96.21\n", + "| epoch 9 | 1400/ 2727 batches | lr 3.32 | ms/batch 53.55 | loss 4.52 | ppl 91.97\n", + "| epoch 9 | 1600/ 2727 batches | lr 3.32 | ms/batch 53.54 | loss 4.44 | ppl 84.91\n", + "| epoch 9 | 1800/ 2727 batches | lr 3.32 | ms/batch 53.52 | loss 4.55 | ppl 94.87\n", + "| epoch 9 | 2000/ 2727 batches | lr 3.32 | ms/batch 53.56 | loss 4.55 | ppl 94.49\n", + "| epoch 9 | 2200/ 2727 batches | lr 3.32 | ms/batch 53.54 | loss 4.52 | ppl 91.54\n", + "| epoch 9 | 2400/ 2727 batches | lr 3.32 | ms/batch 53.56 | loss 4.55 | ppl 94.55\n", + "| epoch 9 | 2600/ 2727 batches | lr 3.32 | ms/batch 53.55 | loss 4.55 | ppl 95.05\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 9 | time: 152.85s | valid loss 5.29 | valid ppl 198.51\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 10 | 200/ 2727 batches | lr 3.15 | ms/batch 53.81 | loss 4.58 | ppl 97.48\n", + "| epoch 10 | 400/ 2727 batches | lr 3.15 | ms/batch 53.56 | loss 4.50 | ppl 90.24\n", + "| epoch 10 | 600/ 2727 batches | lr 3.15 | ms/batch 53.55 | loss 4.50 | ppl 90.01\n", + "| epoch 10 | 800/ 2727 batches | lr 3.15 | ms/batch 53.55 | loss 4.49 | ppl 89.46\n", + "| epoch 10 | 1000/ 2727 batches | lr 3.15 | ms/batch 53.54 | loss 4.45 | ppl 85.29\n", + "| epoch 10 | 1200/ 2727 batches | lr 3.15 | ms/batch 53.55 | loss 4.49 | ppl 89.10\n", + "| epoch 10 | 1400/ 2727 batches | lr 3.15 | ms/batch 53.56 | loss 4.45 | ppl 85.22\n", + "| epoch 10 | 1600/ 2727 batches | lr 3.15 | ms/batch 53.52 | loss 4.38 | ppl 79.46\n", + "| epoch 10 | 1800/ 2727 batches | lr 3.15 | ms/batch 53.51 | loss 4.47 | ppl 87.24\n", + "| epoch 10 | 2000/ 2727 batches | lr 3.15 | ms/batch 53.52 | loss 4.47 | ppl 87.13\n", + "| epoch 10 | 2200/ 2727 batches | lr 3.15 | ms/batch 53.53 | loss 4.44 | ppl 85.14\n", + "| epoch 10 | 2400/ 2727 batches | lr 3.15 | ms/batch 53.53 | loss 4.47 | ppl 87.55\n", + "| epoch 10 | 2600/ 2727 batches | lr 3.15 | ms/batch 53.53 | loss 4.47 | ppl 87.64\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 10 | time: 152.82s | valid loss 5.32 | valid ppl 203.65\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 11 | 200/ 2727 batches | lr 2.99 | ms/batch 53.82 | loss 4.50 | ppl 90.42\n", + "| epoch 11 | 400/ 2727 batches | lr 2.99 | ms/batch 53.53 | loss 4.43 | ppl 83.56\n", + "| epoch 11 | 600/ 2727 batches | lr 2.99 | ms/batch 53.54 | loss 4.43 | ppl 83.85\n", + "| epoch 11 | 800/ 2727 batches | lr 2.99 | ms/batch 53.54 | loss 4.42 | ppl 83.09\n", + "| epoch 11 | 1000/ 2727 batches | lr 2.99 | ms/batch 53.54 | loss 4.38 | ppl 79.94\n", + "| epoch 11 | 1200/ 2727 batches | lr 2.99 | ms/batch 53.52 | loss 4.42 | ppl 83.14\n", + "| epoch 11 | 1400/ 2727 batches | lr 2.99 | ms/batch 53.54 | loss 4.37 | ppl 79.22\n", + "| epoch 11 | 1600/ 2727 batches | lr 2.99 | ms/batch 53.52 | loss 4.31 | ppl 74.26\n", + "| epoch 11 | 1800/ 2727 batches | lr 2.99 | ms/batch 53.57 | loss 4.40 | ppl 81.43\n", + "| epoch 11 | 2000/ 2727 batches | lr 2.99 | ms/batch 53.56 | loss 4.40 | ppl 81.52\n", + "| epoch 11 | 2200/ 2727 batches | lr 2.99 | ms/batch 53.54 | loss 4.38 | ppl 79.78\n", + "| epoch 11 | 2400/ 2727 batches | lr 2.99 | ms/batch 53.51 | loss 4.40 | ppl 81.41\n", + "| epoch 11 | 2600/ 2727 batches | lr 2.99 | ms/batch 53.53 | loss 4.41 | ppl 82.35\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 11 | time: 152.82s | valid loss 5.32 | valid ppl 204.69\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 12 | 200/ 2727 batches | lr 2.84 | ms/batch 53.79 | loss 4.43 | ppl 84.16\n", + "| epoch 12 | 400/ 2727 batches | lr 2.84 | ms/batch 53.53 | loss 4.36 | ppl 77.93\n", + "| epoch 12 | 600/ 2727 batches | lr 2.84 | ms/batch 53.55 | loss 4.36 | ppl 78.43\n", + "| epoch 12 | 800/ 2727 batches | lr 2.84 | ms/batch 53.53 | loss 4.36 | ppl 78.11\n", + "| epoch 12 | 1000/ 2727 batches | lr 2.84 | ms/batch 53.53 | loss 4.30 | ppl 73.88\n", + "| epoch 12 | 1200/ 2727 batches | lr 2.84 | ms/batch 53.54 | loss 4.35 | ppl 77.59\n", + "| epoch 12 | 1400/ 2727 batches | lr 2.84 | ms/batch 53.54 | loss 4.31 | ppl 74.45\n", + "| epoch 12 | 1600/ 2727 batches | lr 2.84 | ms/batch 53.49 | loss 4.24 | ppl 69.74\n", + "| epoch 12 | 1800/ 2727 batches | lr 2.84 | ms/batch 53.57 | loss 4.33 | ppl 76.30\n", + "| epoch 12 | 2000/ 2727 batches | lr 2.84 | ms/batch 53.51 | loss 4.33 | ppl 75.93\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| epoch 12 | 2200/ 2727 batches | lr 2.84 | ms/batch 53.53 | loss 4.31 | ppl 74.43\n", + "| epoch 12 | 2400/ 2727 batches | lr 2.84 | ms/batch 53.52 | loss 4.33 | ppl 76.31\n", + "| epoch 12 | 2600/ 2727 batches | lr 2.84 | ms/batch 53.52 | loss 4.35 | ppl 77.29\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 12 | time: 152.80s | valid loss 5.38 | valid ppl 215.98\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 13 | 200/ 2727 batches | lr 2.70 | ms/batch 53.81 | loss 4.37 | ppl 79.06\n", + "| epoch 13 | 400/ 2727 batches | lr 2.70 | ms/batch 53.56 | loss 4.29 | ppl 72.96\n", + "| epoch 13 | 600/ 2727 batches | lr 2.70 | ms/batch 53.56 | loss 4.30 | ppl 73.51\n", + "| epoch 13 | 800/ 2727 batches | lr 2.70 | ms/batch 53.55 | loss 4.29 | ppl 72.86\n", + "| epoch 13 | 1000/ 2727 batches | lr 2.70 | ms/batch 53.54 | loss 4.25 | ppl 69.97\n", + "| epoch 13 | 1200/ 2727 batches | lr 2.70 | ms/batch 53.53 | loss 4.29 | ppl 73.18\n", + "| epoch 13 | 1400/ 2727 batches | lr 2.70 | ms/batch 53.53 | loss 4.24 | ppl 69.60\n", + "| epoch 13 | 1600/ 2727 batches | lr 2.70 | ms/batch 53.53 | loss 4.18 | ppl 65.65\n", + "| epoch 13 | 1800/ 2727 batches | lr 2.70 | ms/batch 53.51 | loss 4.27 | ppl 71.60\n", + "| epoch 13 | 2000/ 2727 batches | lr 2.70 | ms/batch 53.51 | loss 4.27 | ppl 71.30\n", + "| epoch 13 | 2200/ 2727 batches | lr 2.70 | ms/batch 53.55 | loss 4.25 | ppl 70.30\n", + "| epoch 13 | 2400/ 2727 batches | lr 2.70 | ms/batch 53.52 | loss 4.28 | ppl 71.95\n", + "| epoch 13 | 2600/ 2727 batches | lr 2.70 | ms/batch 53.54 | loss 4.28 | ppl 72.54\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 13 | time: 152.81s | valid loss 5.39 | valid ppl 220.01\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 14 | 200/ 2727 batches | lr 2.57 | ms/batch 53.82 | loss 4.31 | ppl 74.32\n", + "| epoch 14 | 400/ 2727 batches | lr 2.57 | ms/batch 53.53 | loss 4.23 | ppl 69.03\n", + "| epoch 14 | 600/ 2727 batches | lr 2.57 | ms/batch 53.51 | loss 4.24 | ppl 69.41\n", + "| epoch 14 | 800/ 2727 batches | lr 2.57 | ms/batch 53.54 | loss 4.24 | ppl 69.43\n", + "| epoch 14 | 1000/ 2727 batches | lr 2.57 | ms/batch 53.55 | loss 4.19 | ppl 65.95\n", + "| epoch 14 | 1200/ 2727 batches | lr 2.57 | ms/batch 53.56 | loss 4.23 | ppl 69.04\n", + "| epoch 14 | 1400/ 2727 batches | lr 2.57 | ms/batch 53.54 | loss 4.19 | ppl 65.93\n", + "| epoch 14 | 1600/ 2727 batches | lr 2.57 | ms/batch 53.56 | loss 4.14 | ppl 62.51\n", + "| epoch 14 | 1800/ 2727 batches | lr 2.57 | ms/batch 53.51 | loss 4.21 | ppl 67.67\n", + "| epoch 14 | 2000/ 2727 batches | lr 2.57 | ms/batch 53.57 | loss 4.20 | ppl 66.94\n", + "| epoch 14 | 2200/ 2727 batches | lr 2.57 | ms/batch 53.55 | loss 4.19 | ppl 66.28\n", + "| epoch 14 | 2400/ 2727 batches | lr 2.57 | ms/batch 53.56 | loss 4.22 | ppl 67.79\n", + "| epoch 14 | 2600/ 2727 batches | lr 2.57 | ms/batch 53.52 | loss 4.22 | ppl 68.15\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 14 | time: 152.84s | valid loss 5.39 | valid ppl 220.22\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 15 | 200/ 2727 batches | lr 2.44 | ms/batch 53.83 | loss 4.26 | ppl 70.62\n", + "| epoch 15 | 400/ 2727 batches | lr 2.44 | ms/batch 53.56 | loss 4.17 | ppl 64.96\n", + "| epoch 15 | 600/ 2727 batches | lr 2.44 | ms/batch 53.52 | loss 4.18 | ppl 65.63\n", + "| epoch 15 | 800/ 2727 batches | lr 2.44 | ms/batch 53.55 | loss 4.18 | ppl 65.28\n", + "| epoch 15 | 1000/ 2727 batches | lr 2.44 | ms/batch 53.59 | loss 4.14 | ppl 62.56\n", + "| epoch 15 | 1200/ 2727 batches | lr 2.44 | ms/batch 53.55 | loss 4.17 | ppl 64.85\n", + "| epoch 15 | 1400/ 2727 batches | lr 2.44 | ms/batch 53.58 | loss 4.13 | ppl 62.30\n", + "| epoch 15 | 1600/ 2727 batches | lr 2.44 | ms/batch 53.56 | loss 4.08 | ppl 59.29\n", + "| epoch 15 | 1800/ 2727 batches | lr 2.44 | ms/batch 53.56 | loss 4.15 | ppl 63.62\n", + "| epoch 15 | 2000/ 2727 batches | lr 2.44 | ms/batch 53.55 | loss 4.15 | ppl 63.55\n", + "| epoch 15 | 2200/ 2727 batches | lr 2.44 | ms/batch 53.52 | loss 4.14 | ppl 62.85\n", + "| epoch 15 | 2400/ 2727 batches | lr 2.44 | ms/batch 53.55 | loss 4.16 | ppl 64.08\n", + "| epoch 15 | 2600/ 2727 batches | lr 2.44 | ms/batch 53.57 | loss 4.17 | ppl 64.53\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 15 | time: 152.87s | valid loss 5.46 | valid ppl 235.52\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 16 | 200/ 2727 batches | lr 2.32 | ms/batch 53.84 | loss 4.20 | ppl 66.56\n", + "| epoch 16 | 400/ 2727 batches | lr 2.32 | ms/batch 53.54 | loss 4.13 | ppl 61.89\n", + "| epoch 16 | 600/ 2727 batches | lr 2.32 | ms/batch 53.56 | loss 4.13 | ppl 62.00\n", + "| epoch 16 | 800/ 2727 batches | lr 2.32 | ms/batch 53.57 | loss 4.12 | ppl 61.75\n", + "| epoch 16 | 1000/ 2727 batches | lr 2.32 | ms/batch 53.55 | loss 4.08 | ppl 59.16\n", + "| epoch 16 | 1200/ 2727 batches | lr 2.32 | ms/batch 53.56 | loss 4.12 | ppl 61.80\n", + "| epoch 16 | 1400/ 2727 batches | lr 2.32 | ms/batch 53.57 | loss 4.08 | ppl 58.94\n", + "| epoch 16 | 1600/ 2727 batches | lr 2.32 | ms/batch 53.54 | loss 4.03 | ppl 56.54\n", + "| epoch 16 | 1800/ 2727 batches | lr 2.32 | ms/batch 53.52 | loss 4.10 | ppl 60.59\n", + "| epoch 16 | 2000/ 2727 batches | lr 2.32 | ms/batch 53.58 | loss 4.10 | ppl 60.13\n", + "| epoch 16 | 2200/ 2727 batches | lr 2.32 | ms/batch 53.56 | loss 4.09 | ppl 59.79\n", + "| epoch 16 | 2400/ 2727 batches | lr 2.32 | ms/batch 53.60 | loss 4.11 | ppl 60.83\n", + "| epoch 16 | 2600/ 2727 batches | lr 2.32 | ms/batch 53.57 | loss 4.11 | ppl 61.11\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 16 | time: 152.88s | valid loss 5.41 | valid ppl 224.68\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 17 | 200/ 2727 batches | lr 2.20 | ms/batch 53.78 | loss 4.15 | ppl 63.27\n", + "| epoch 17 | 400/ 2727 batches | lr 2.20 | ms/batch 53.57 | loss 4.07 | ppl 58.62\n", + "| epoch 17 | 600/ 2727 batches | lr 2.20 | ms/batch 53.49 | loss 4.08 | ppl 59.06\n", + "| epoch 17 | 800/ 2727 batches | lr 2.20 | ms/batch 53.57 | loss 4.08 | ppl 58.99\n", + "| epoch 17 | 1000/ 2727 batches | lr 2.20 | ms/batch 53.57 | loss 4.03 | ppl 56.47\n", + "| epoch 17 | 1200/ 2727 batches | lr 2.20 | ms/batch 53.53 | loss 4.07 | ppl 58.70\n", + "| epoch 17 | 1400/ 2727 batches | lr 2.20 | ms/batch 53.51 | loss 4.03 | ppl 56.07\n", + "| epoch 17 | 1600/ 2727 batches | lr 2.20 | ms/batch 53.56 | loss 3.99 | ppl 53.92\n", + "| epoch 17 | 1800/ 2727 batches | lr 2.20 | ms/batch 53.57 | loss 4.05 | ppl 57.44\n", + "| epoch 17 | 2000/ 2727 batches | lr 2.20 | ms/batch 53.53 | loss 4.05 | ppl 57.22\n", + "| epoch 17 | 2200/ 2727 batches | lr 2.20 | ms/batch 53.55 | loss 4.05 | ppl 57.23\n", + "| epoch 17 | 2400/ 2727 batches | lr 2.20 | ms/batch 53.55 | loss 4.05 | ppl 57.45\n", + "| epoch 17 | 2600/ 2727 batches | lr 2.20 | ms/batch 53.59 | loss 4.06 | ppl 58.19\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 17 | time: 152.89s | valid loss 5.46 | valid ppl 234.56\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 18 | 200/ 2727 batches | lr 2.09 | ms/batch 53.84 | loss 4.10 | ppl 60.40\n", + "| epoch 18 | 400/ 2727 batches | lr 2.09 | ms/batch 53.60 | loss 4.03 | ppl 56.36\n", + "| epoch 18 | 600/ 2727 batches | lr 2.09 | ms/batch 53.55 | loss 4.03 | ppl 56.52\n", + "| epoch 18 | 800/ 2727 batches | lr 2.09 | ms/batch 53.51 | loss 4.03 | ppl 56.19\n", + "| epoch 18 | 1000/ 2727 batches | lr 2.09 | ms/batch 53.55 | loss 3.99 | ppl 54.11\n", + "| epoch 18 | 1200/ 2727 batches | lr 2.09 | ms/batch 53.52 | loss 4.02 | ppl 55.88\n", + "| epoch 18 | 1400/ 2727 batches | lr 2.09 | ms/batch 53.55 | loss 3.98 | ppl 53.29\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| epoch 18 | 1600/ 2727 batches | lr 2.09 | ms/batch 53.57 | loss 3.94 | ppl 51.31\n", + "| epoch 18 | 1800/ 2727 batches | lr 2.09 | ms/batch 53.55 | loss 4.00 | ppl 54.68\n", + "| epoch 18 | 2000/ 2727 batches | lr 2.09 | ms/batch 53.56 | loss 3.99 | ppl 54.32\n", + "| epoch 18 | 2200/ 2727 batches | lr 2.09 | ms/batch 53.55 | loss 4.00 | ppl 54.51\n", + "| epoch 18 | 2400/ 2727 batches | lr 2.09 | ms/batch 53.52 | loss 4.01 | ppl 54.88\n", + "| epoch 18 | 2600/ 2727 batches | lr 2.09 | ms/batch 53.54 | loss 4.02 | ppl 55.54\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 18 | time: 152.86s | valid loss 5.52 | valid ppl 249.11\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 19 | 200/ 2727 batches | lr 1.99 | ms/batch 53.84 | loss 4.05 | ppl 57.55\n", + "| epoch 19 | 400/ 2727 batches | lr 1.99 | ms/batch 53.54 | loss 3.98 | ppl 53.78\n", + "| epoch 19 | 600/ 2727 batches | lr 1.99 | ms/batch 53.60 | loss 3.98 | ppl 53.75\n", + "| epoch 19 | 800/ 2727 batches | lr 1.99 | ms/batch 53.55 | loss 3.98 | ppl 53.61\n", + "| epoch 19 | 1000/ 2727 batches | lr 1.99 | ms/batch 53.57 | loss 3.94 | ppl 51.43\n", + "| epoch 19 | 1200/ 2727 batches | lr 1.99 | ms/batch 53.59 | loss 3.98 | ppl 53.41\n", + "| epoch 19 | 1400/ 2727 batches | lr 1.99 | ms/batch 53.60 | loss 3.94 | ppl 51.20\n", + "| epoch 19 | 1600/ 2727 batches | lr 1.99 | ms/batch 53.57 | loss 3.90 | ppl 49.32\n", + "| epoch 19 | 1800/ 2727 batches | lr 1.99 | ms/batch 53.55 | loss 3.96 | ppl 52.57\n", + "| epoch 19 | 2000/ 2727 batches | lr 1.99 | ms/batch 53.52 | loss 3.95 | ppl 52.05\n", + "| epoch 19 | 2200/ 2727 batches | lr 1.99 | ms/batch 53.54 | loss 3.95 | ppl 52.16\n", + "| epoch 19 | 2400/ 2727 batches | lr 1.99 | ms/batch 53.55 | loss 3.96 | ppl 52.57\n", + "| epoch 19 | 2600/ 2727 batches | lr 1.99 | ms/batch 53.56 | loss 3.97 | ppl 53.06\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 19 | time: 152.88s | valid loss 5.50 | valid ppl 244.11\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 1 | time: 17.05s | valid loss 6.34 | valid ppl 566.15\n", + "| epoch 20 | 200/ 2727 batches | lr 1.89 | ms/batch 53.86 | loss 4.01 | ppl 55.25\n", + "| epoch 20 | 400/ 2727 batches | lr 1.89 | ms/batch 53.56 | loss 3.94 | ppl 51.37\n", + "| epoch 20 | 600/ 2727 batches | lr 1.89 | ms/batch 53.55 | loss 3.94 | ppl 51.51\n", + "| epoch 20 | 800/ 2727 batches | lr 1.89 | ms/batch 53.54 | loss 3.94 | ppl 51.36\n", + "| epoch 20 | 1000/ 2727 batches | lr 1.89 | ms/batch 53.55 | loss 3.90 | ppl 49.49\n", + "| epoch 20 | 1200/ 2727 batches | lr 1.89 | ms/batch 53.55 | loss 3.94 | ppl 51.19\n", + "| epoch 20 | 1400/ 2727 batches | lr 1.89 | ms/batch 53.53 | loss 3.89 | ppl 49.02\n", + "| epoch 20 | 1600/ 2727 batches | lr 1.89 | ms/batch 53.56 | loss 3.86 | ppl 47.33\n", + "| epoch 20 | 1800/ 2727 batches | lr 1.89 | ms/batch 53.52 | loss 3.91 | ppl 50.00\n", + "| epoch 20 | 2000/ 2727 batches | lr 1.89 | ms/batch 53.55 | loss 3.91 | ppl 49.91\n", + "| epoch 20 | 2200/ 2727 batches | lr 1.89 | ms/batch 53.53 | loss 3.91 | ppl 50.04\n", + "| epoch 20 | 2400/ 2727 batches | lr 1.89 | ms/batch 53.55 | loss 3.92 | ppl 50.39\n", + "| epoch 20 | 2600/ 2727 batches | lr 1.89 | ms/batch 53.53 | loss 3.92 | ppl 50.64\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 2 | 200/ 383 batches | lr 4.75 | ms/batch 19.83 | loss 6.14 | ppl 463.13\n", + "| end of epoch 20 | time: 152.85s | valid loss 5.54 | valid ppl 253.74\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 2 | time: 8.38s | valid loss 6.01 | valid ppl 406.56\n", + "| epoch 21 | 200/ 2727 batches | lr 1.79 | ms/batch 53.83 | loss 3.97 | ppl 52.84\n", + "| epoch 21 | 400/ 2727 batches | lr 1.79 | ms/batch 53.58 | loss 3.90 | ppl 49.32\n", + "| epoch 21 | 600/ 2727 batches | lr 1.79 | ms/batch 53.55 | loss 3.90 | ppl 49.43\n", + "| epoch 21 | 800/ 2727 batches | lr 1.79 | ms/batch 53.56 | loss 3.90 | ppl 49.36\n", + "| epoch 21 | 1000/ 2727 batches | lr 1.79 | ms/batch 53.58 | loss 3.86 | ppl 47.32\n", + "| epoch 21 | 1200/ 2727 batches | lr 1.79 | ms/batch 53.55 | loss 3.89 | ppl 49.08\n", + "| epoch 21 | 1400/ 2727 batches | lr 1.79 | ms/batch 53.55 | loss 3.85 | ppl 47.10\n", + "| epoch 21 | 1600/ 2727 batches | lr 1.79 | ms/batch 53.57 | loss 3.82 | ppl 45.54\n", + "| epoch 21 | 1800/ 2727 batches | lr 1.79 | ms/batch 53.55 | loss 3.87 | ppl 48.00\n", + "| epoch 21 | 2000/ 2727 batches | lr 1.79 | ms/batch 53.52 | loss 3.87 | ppl 47.92\n", + "| epoch 21 | 2200/ 2727 batches | lr 1.79 | ms/batch 53.51 | loss 3.87 | ppl 48.13\n", + "| epoch 21 | 2400/ 2727 batches | lr 1.79 | ms/batch 53.59 | loss 3.88 | ppl 48.32\n", + "| epoch 21 | 2600/ 2727 batches | lr 1.79 | ms/batch 53.53 | loss 3.89 | ppl 48.84\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 3 | 200/ 383 batches | lr 4.51 | ms/batch 19.83 | loss 5.61 | ppl 273.67\n", + "| end of epoch 21 | time: 152.88s | valid loss 5.53 | valid ppl 252.72\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 3 | time: 8.38s | valid loss 5.95 | valid ppl 383.10\n", + "| epoch 22 | 200/ 2727 batches | lr 1.70 | ms/batch 53.79 | loss 3.93 | ppl 50.71\n", + "| epoch 22 | 400/ 2727 batches | lr 1.70 | ms/batch 53.51 | loss 3.86 | ppl 47.49\n", + "| epoch 22 | 600/ 2727 batches | lr 1.70 | ms/batch 53.52 | loss 3.87 | ppl 47.74\n", + "| epoch 22 | 800/ 2727 batches | lr 1.70 | ms/batch 53.51 | loss 3.86 | ppl 47.44\n", + "| epoch 22 | 1000/ 2727 batches | lr 1.70 | ms/batch 53.47 | loss 3.82 | ppl 45.63\n", + "| epoch 22 | 1200/ 2727 batches | lr 1.70 | ms/batch 53.53 | loss 3.85 | ppl 47.21\n", + "| epoch 22 | 1400/ 2727 batches | lr 1.70 | ms/batch 53.55 | loss 3.82 | ppl 45.39\n", + "| epoch 22 | 1600/ 2727 batches | lr 1.70 | ms/batch 53.54 | loss 3.78 | ppl 43.90\n", + "| epoch 22 | 1800/ 2727 batches | lr 1.70 | ms/batch 53.53 | loss 3.83 | ppl 46.06\n", + "| epoch 22 | 2000/ 2727 batches | lr 1.70 | ms/batch 53.50 | loss 3.82 | ppl 45.82\n", + "| epoch 22 | 2200/ 2727 batches | lr 1.70 | ms/batch 53.56 | loss 3.83 | ppl 46.04\n", + "| epoch 22 | 2400/ 2727 batches | lr 1.70 | ms/batch 53.54 | loss 3.84 | ppl 46.38\n", + "| epoch 22 | 2600/ 2727 batches | lr 1.70 | ms/batch 53.55 | loss 3.85 | ppl 47.16\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 4 | 200/ 383 batches | lr 4.29 | ms/batch 19.89 | loss 5.25 | ppl 190.90\n", + "| end of epoch 22 | time: 152.79s | valid loss 5.54 | valid ppl 254.96\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 4 | time: 8.40s | valid loss 5.96 | valid ppl 386.38\n", + "| epoch 23 | 200/ 2727 batches | lr 1.62 | ms/batch 53.83 | loss 3.89 | ppl 48.94\n", + "| epoch 23 | 400/ 2727 batches | lr 1.62 | ms/batch 53.58 | loss 3.82 | ppl 45.63\n", + "| epoch 23 | 600/ 2727 batches | lr 1.62 | ms/batch 53.55 | loss 3.83 | ppl 46.01\n", + "| epoch 23 | 800/ 2727 batches | lr 1.62 | ms/batch 53.56 | loss 3.82 | ppl 45.62\n", + "| epoch 23 | 1000/ 2727 batches | lr 1.62 | ms/batch 53.56 | loss 3.79 | ppl 44.07\n", + "| epoch 23 | 1200/ 2727 batches | lr 1.62 | ms/batch 53.58 | loss 3.82 | ppl 45.52\n", + "| epoch 23 | 1400/ 2727 batches | lr 1.62 | ms/batch 53.58 | loss 3.78 | ppl 43.71\n", + "| epoch 23 | 1600/ 2727 batches | lr 1.62 | ms/batch 53.53 | loss 3.75 | ppl 42.52\n", + "| epoch 23 | 1800/ 2727 batches | lr 1.62 | ms/batch 53.57 | loss 3.80 | ppl 44.67\n", + "| epoch 23 | 2000/ 2727 batches | lr 1.62 | ms/batch 53.57 | loss 3.79 | ppl 44.26\n", + "| epoch 23 | 2200/ 2727 batches | lr 1.62 | ms/batch 53.55 | loss 3.80 | ppl 44.73\n", + "| epoch 23 | 2400/ 2727 batches | lr 1.62 | ms/batch 53.59 | loss 3.80 | ppl 44.89\n", + "| epoch 23 | 2600/ 2727 batches | lr 1.62 | ms/batch 53.58 | loss 3.82 | ppl 45.39\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 5 | 200/ 383 batches | lr 4.07 | ms/batch 19.88 | loss 4.96 | ppl 142.55\n", + "| end of epoch 23 | time: 152.90s | valid loss 5.57 | valid ppl 263.65\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 5 | time: 8.40s | valid loss 5.99 | valid ppl 398.76\n", + "| epoch 24 | 200/ 2727 batches | lr 1.54 | ms/batch 53.83 | loss 3.85 | ppl 47.20\n", + "| epoch 24 | 400/ 2727 batches | lr 1.54 | ms/batch 53.56 | loss 3.79 | ppl 44.15\n", + "| epoch 24 | 600/ 2727 batches | lr 1.54 | ms/batch 53.58 | loss 3.79 | ppl 44.42\n", + "| epoch 24 | 800/ 2727 batches | lr 1.54 | ms/batch 53.56 | loss 3.78 | ppl 43.96\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| epoch 24 | 1000/ 2727 batches | lr 1.54 | ms/batch 53.58 | loss 3.75 | ppl 42.47\n", + "| epoch 24 | 1200/ 2727 batches | lr 1.54 | ms/batch 53.57 | loss 3.78 | ppl 44.00\n", + "| epoch 24 | 1400/ 2727 batches | lr 1.54 | ms/batch 53.59 | loss 3.74 | ppl 42.28\n", + "| epoch 24 | 1600/ 2727 batches | lr 1.54 | ms/batch 53.57 | loss 3.72 | ppl 41.13\n", + "| epoch 24 | 1800/ 2727 batches | lr 1.54 | ms/batch 53.57 | loss 3.76 | ppl 43.08\n", + "| epoch 24 | 2000/ 2727 batches | lr 1.54 | ms/batch 53.54 | loss 3.76 | ppl 42.81\n", + "| epoch 24 | 2200/ 2727 batches | lr 1.54 | ms/batch 53.55 | loss 3.77 | ppl 43.35\n", + "| epoch 24 | 2400/ 2727 batches | lr 1.54 | ms/batch 53.57 | loss 3.77 | ppl 43.25\n", + "| epoch 24 | 2600/ 2727 batches | lr 1.54 | ms/batch 53.56 | loss 3.77 | ppl 43.53\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 6 | 200/ 383 batches | lr 3.87 | ms/batch 19.89 | loss 4.71 | ppl 111.09\n", + "| end of epoch 24 | time: 152.91s | valid loss 5.60 | valid ppl 271.38\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 6 | time: 8.40s | valid loss 6.04 | valid ppl 421.64\n", + "| epoch 25 | 200/ 2727 batches | lr 1.46 | ms/batch 53.86 | loss 3.83 | ppl 45.88\n", + "| epoch 25 | 400/ 2727 batches | lr 1.46 | ms/batch 53.56 | loss 3.76 | ppl 42.91\n", + "| epoch 25 | 600/ 2727 batches | lr 1.46 | ms/batch 53.55 | loss 3.76 | ppl 42.82\n", + "| epoch 25 | 800/ 2727 batches | lr 1.46 | ms/batch 53.54 | loss 3.75 | ppl 42.46\n", + "| epoch 25 | 1000/ 2727 batches | lr 1.46 | ms/batch 53.56 | loss 3.72 | ppl 41.09\n", + "| epoch 25 | 1200/ 2727 batches | lr 1.46 | ms/batch 53.53 | loss 3.75 | ppl 42.47\n", + "| epoch 25 | 1400/ 2727 batches | lr 1.46 | ms/batch 53.56 | loss 3.71 | ppl 40.83\n", + "| epoch 25 | 1600/ 2727 batches | lr 1.46 | ms/batch 53.56 | loss 3.69 | ppl 39.91\n", + "| epoch 25 | 1800/ 2727 batches | lr 1.46 | ms/batch 53.55 | loss 3.73 | ppl 41.62\n", + "| epoch 25 | 2000/ 2727 batches | lr 1.46 | ms/batch 53.55 | loss 3.72 | ppl 41.31\n", + "| epoch 25 | 2200/ 2727 batches | lr 1.46 | ms/batch 53.53 | loss 3.73 | ppl 41.76\n", + "| epoch 25 | 2400/ 2727 batches | lr 1.46 | ms/batch 53.52 | loss 3.74 | ppl 41.91\n", + "| epoch 25 | 2600/ 2727 batches | lr 1.46 | ms/batch 53.56 | loss 3.74 | ppl 42.25\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 7 | 200/ 383 batches | lr 3.68 | ms/batch 19.89 | loss 4.49 | ppl 89.44\n", + "| end of epoch 25 | time: 152.86s | valid loss 5.63 | valid ppl 278.62\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 7 | time: 8.40s | valid loss 6.11 | valid ppl 452.51\n", + "| epoch 26 | 200/ 2727 batches | lr 1.39 | ms/batch 53.85 | loss 3.79 | ppl 44.36\n", + "| epoch 26 | 400/ 2727 batches | lr 1.39 | ms/batch 53.53 | loss 3.72 | ppl 41.40\n", + "| epoch 26 | 600/ 2727 batches | lr 1.39 | ms/batch 53.55 | loss 3.73 | ppl 41.71\n", + "| epoch 26 | 800/ 2727 batches | lr 1.39 | ms/batch 53.56 | loss 3.72 | ppl 41.34\n", + "| epoch 26 | 1000/ 2727 batches | lr 1.39 | ms/batch 53.51 | loss 3.69 | ppl 39.94\n", + "| epoch 26 | 1200/ 2727 batches | lr 1.39 | ms/batch 53.54 | loss 3.71 | ppl 41.05\n", + "| epoch 26 | 1400/ 2727 batches | lr 1.39 | ms/batch 53.58 | loss 3.68 | ppl 39.64\n", + "| epoch 26 | 1600/ 2727 batches | lr 1.39 | ms/batch 53.57 | loss 3.66 | ppl 38.83\n", + "| epoch 26 | 1800/ 2727 batches | lr 1.39 | ms/batch 53.56 | loss 3.69 | ppl 40.20\n", + "| epoch 26 | 2000/ 2727 batches | lr 1.39 | ms/batch 53.59 | loss 3.69 | ppl 40.13\n", + "| epoch 26 | 2200/ 2727 batches | lr 1.39 | ms/batch 53.53 | loss 3.70 | ppl 40.57\n", + "| epoch 26 | 2400/ 2727 batches | lr 1.39 | ms/batch 53.56 | loss 3.70 | ppl 40.39\n", + "| epoch 26 | 2600/ 2727 batches | lr 1.39 | ms/batch 53.56 | loss 3.72 | ppl 41.18\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 8 | 200/ 383 batches | lr 3.49 | ms/batch 19.92 | loss 4.30 | ppl 73.72\n", + "| end of epoch 26 | time: 152.88s | valid loss 5.68 | valid ppl 291.76\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 8 | time: 8.42s | valid loss 6.17 | valid ppl 479.04\n", + "| epoch 27 | 200/ 2727 batches | lr 1.32 | ms/batch 53.83 | loss 3.76 | ppl 43.16\n", + "| epoch 27 | 400/ 2727 batches | lr 1.32 | ms/batch 53.57 | loss 3.69 | ppl 40.11\n", + "| epoch 27 | 600/ 2727 batches | lr 1.32 | ms/batch 53.55 | loss 3.69 | ppl 40.24\n", + "| epoch 27 | 800/ 2727 batches | lr 1.32 | ms/batch 53.55 | loss 3.69 | ppl 40.17\n", + "| epoch 27 | 1000/ 2727 batches | lr 1.32 | ms/batch 53.59 | loss 3.66 | ppl 38.70\n", + "| epoch 27 | 1200/ 2727 batches | lr 1.32 | ms/batch 53.53 | loss 3.69 | ppl 39.91\n", + "| epoch 27 | 1400/ 2727 batches | lr 1.32 | ms/batch 53.53 | loss 3.65 | ppl 38.30\n", + "| epoch 27 | 1600/ 2727 batches | lr 1.32 | ms/batch 53.58 | loss 3.63 | ppl 37.70\n", + "| epoch 27 | 1800/ 2727 batches | lr 1.32 | ms/batch 53.57 | loss 3.67 | ppl 39.13\n", + "| epoch 27 | 2000/ 2727 batches | lr 1.32 | ms/batch 53.53 | loss 3.66 | ppl 38.95\n", + "| epoch 27 | 2200/ 2727 batches | lr 1.32 | ms/batch 53.58 | loss 3.67 | ppl 39.37\n", + "| epoch 27 | 2400/ 2727 batches | lr 1.32 | ms/batch 53.57 | loss 3.68 | ppl 39.50\n", + "| epoch 27 | 2600/ 2727 batches | lr 1.32 | ms/batch 53.55 | loss 3.68 | ppl 39.59\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 9 | 200/ 383 batches | lr 3.32 | ms/batch 19.93 | loss 4.13 | ppl 62.43\n", + "| end of epoch 27 | time: 152.91s | valid loss 5.66 | valid ppl 286.29\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 9 | time: 8.42s | valid loss 6.26 | valid ppl 522.27\n", + "| epoch 28 | 200/ 2727 batches | lr 1.25 | ms/batch 53.80 | loss 3.73 | ppl 41.62\n", + "| epoch 28 | 400/ 2727 batches | lr 1.25 | ms/batch 53.58 | loss 3.67 | ppl 39.16\n", + "| epoch 28 | 600/ 2727 batches | lr 1.25 | ms/batch 53.56 | loss 3.67 | ppl 39.34\n", + "| epoch 28 | 800/ 2727 batches | lr 1.25 | ms/batch 53.56 | loss 3.67 | ppl 39.07\n", + "| epoch 28 | 1000/ 2727 batches | lr 1.25 | ms/batch 53.59 | loss 3.63 | ppl 37.65\n", + "| epoch 28 | 1200/ 2727 batches | lr 1.25 | ms/batch 53.60 | loss 3.66 | ppl 38.76\n", + "| epoch 28 | 1400/ 2727 batches | lr 1.25 | ms/batch 53.57 | loss 3.62 | ppl 37.52\n", + "| epoch 28 | 1600/ 2727 batches | lr 1.25 | ms/batch 53.61 | loss 3.60 | ppl 36.77\n", + "| epoch 28 | 1800/ 2727 batches | lr 1.25 | ms/batch 53.54 | loss 3.64 | ppl 38.03\n", + "| epoch 28 | 2000/ 2727 batches | lr 1.25 | ms/batch 53.55 | loss 3.63 | ppl 37.81\n", + "| epoch 28 | 2200/ 2727 batches | lr 1.25 | ms/batch 53.56 | loss 3.65 | ppl 38.44\n", + "| epoch 28 | 2400/ 2727 batches | lr 1.25 | ms/batch 53.54 | loss 3.65 | ppl 38.44\n", + "| epoch 28 | 2600/ 2727 batches | lr 1.25 | ms/batch 53.54 | loss 3.66 | ppl 39.04\n", "-----------------------------------------------------------------------------------------\n", - "| epoch 10 | 200/ 383 batches | lr 3.15 | ms/batch 19.95 | loss 3.99 | ppl 53.96\n", + "| end of epoch 28 | time: 152.89s | valid loss 5.65 | valid ppl 285.63\n", "-----------------------------------------------------------------------------------------\n", - "| end of epoch 10 | time: 8.43s | valid loss 6.31 | valid ppl 548.35\n", + "| epoch 29 | 200/ 2727 batches | lr 1.19 | ms/batch 53.80 | loss 3.70 | ppl 40.64\n", + "| epoch 29 | 400/ 2727 batches | lr 1.19 | ms/batch 53.52 | loss 3.64 | ppl 38.25\n", + "| epoch 29 | 600/ 2727 batches | lr 1.19 | ms/batch 53.56 | loss 3.64 | ppl 38.23\n", + "| epoch 29 | 800/ 2727 batches | lr 1.19 | ms/batch 53.58 | loss 3.63 | ppl 37.88\n", + "| epoch 29 | 1000/ 2727 batches | lr 1.19 | ms/batch 53.50 | loss 3.60 | ppl 36.69\n", + "| epoch 29 | 1200/ 2727 batches | lr 1.19 | ms/batch 53.55 | loss 3.64 | ppl 37.91\n", + "| epoch 29 | 1400/ 2727 batches | lr 1.19 | ms/batch 53.55 | loss 3.60 | ppl 36.52\n", + "| epoch 29 | 1600/ 2727 batches | lr 1.19 | ms/batch 53.54 | loss 3.58 | ppl 35.87\n", + "| epoch 29 | 1800/ 2727 batches | lr 1.19 | ms/batch 53.54 | loss 3.61 | ppl 36.97\n", + "| epoch 29 | 2000/ 2727 batches | lr 1.19 | ms/batch 53.50 | loss 3.61 | ppl 36.95\n", + "| epoch 29 | 2200/ 2727 batches | lr 1.19 | ms/batch 53.53 | loss 3.62 | ppl 37.32\n", + "| epoch 29 | 2400/ 2727 batches | lr 1.19 | ms/batch 53.54 | loss 3.62 | ppl 37.20\n", + "| epoch 29 | 2600/ 2727 batches | lr 1.19 | ms/batch 53.50 | loss 3.64 | ppl 37.98\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 29 | time: 152.82s | valid loss 5.69 | valid ppl 294.94\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 30 | 200/ 2727 batches | lr 1.13 | ms/batch 53.83 | loss 3.68 | ppl 39.81\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| epoch 30 | 400/ 2727 batches | lr 1.13 | ms/batch 53.55 | loss 3.61 | ppl 37.10\n", + "| epoch 30 | 600/ 2727 batches | lr 1.13 | ms/batch 53.51 | loss 3.62 | ppl 37.45\n", + "| epoch 30 | 800/ 2727 batches | lr 1.13 | ms/batch 53.54 | loss 3.61 | ppl 36.94\n", + "| epoch 30 | 1000/ 2727 batches | lr 1.13 | ms/batch 53.56 | loss 3.58 | ppl 35.89\n", + "| epoch 30 | 1200/ 2727 batches | lr 1.13 | ms/batch 53.56 | loss 3.61 | ppl 36.84\n", + "| epoch 30 | 1400/ 2727 batches | lr 1.13 | ms/batch 53.54 | loss 3.57 | ppl 35.52\n", + "| epoch 30 | 1600/ 2727 batches | lr 1.13 | ms/batch 53.56 | loss 3.56 | ppl 35.13\n", + "| epoch 30 | 1800/ 2727 batches | lr 1.13 | ms/batch 53.55 | loss 3.58 | ppl 36.04\n", + "| epoch 30 | 2000/ 2727 batches | lr 1.13 | ms/batch 53.57 | loss 3.58 | ppl 35.96\n", + "| epoch 30 | 2200/ 2727 batches | lr 1.13 | ms/batch 53.55 | loss 3.60 | ppl 36.52\n", + "| epoch 30 | 2400/ 2727 batches | lr 1.13 | ms/batch 53.55 | loss 3.60 | ppl 36.44\n", + "| epoch 30 | 2600/ 2727 batches | lr 1.13 | ms/batch 53.55 | loss 3.61 | ppl 36.99\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 30 | time: 152.86s | valid loss 5.71 | valid ppl 303.11\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 31 | 200/ 2727 batches | lr 1.07 | ms/batch 53.78 | loss 3.66 | ppl 38.97\n", + "| epoch 31 | 400/ 2727 batches | lr 1.07 | ms/batch 53.54 | loss 3.59 | ppl 36.20\n", + "| epoch 31 | 600/ 2727 batches | lr 1.07 | ms/batch 53.56 | loss 3.60 | ppl 36.42\n", + "| epoch 31 | 800/ 2727 batches | lr 1.07 | ms/batch 53.58 | loss 3.59 | ppl 36.21\n", + "| epoch 31 | 1000/ 2727 batches | lr 1.07 | ms/batch 53.54 | loss 3.56 | ppl 35.05\n", + "| epoch 31 | 1200/ 2727 batches | lr 1.07 | ms/batch 53.55 | loss 3.58 | ppl 35.99\n", + "| epoch 31 | 1400/ 2727 batches | lr 1.07 | ms/batch 53.57 | loss 3.55 | ppl 34.82\n", + "| epoch 31 | 1600/ 2727 batches | lr 1.07 | ms/batch 53.57 | loss 3.54 | ppl 34.43\n", + "| epoch 31 | 1800/ 2727 batches | lr 1.07 | ms/batch 53.54 | loss 3.56 | ppl 35.21\n", + "| epoch 31 | 2000/ 2727 batches | lr 1.07 | ms/batch 53.58 | loss 3.56 | ppl 35.07\n", + "| epoch 31 | 2200/ 2727 batches | lr 1.07 | ms/batch 53.57 | loss 3.57 | ppl 35.66\n", + "| epoch 31 | 2400/ 2727 batches | lr 1.07 | ms/batch 53.54 | loss 3.57 | ppl 35.53\n", + "| epoch 31 | 2600/ 2727 batches | lr 1.07 | ms/batch 53.54 | loss 3.58 | ppl 35.99\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 31 | time: 152.87s | valid loss 5.71 | valid ppl 300.48\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 32 | 200/ 2727 batches | lr 1.02 | ms/batch 53.82 | loss 3.64 | ppl 38.07\n", + "| epoch 32 | 400/ 2727 batches | lr 1.02 | ms/batch 53.55 | loss 3.57 | ppl 35.65\n", + "| epoch 32 | 600/ 2727 batches | lr 1.02 | ms/batch 53.52 | loss 3.57 | ppl 35.54\n", + "| epoch 32 | 800/ 2727 batches | lr 1.02 | ms/batch 53.54 | loss 3.57 | ppl 35.46\n", + "| epoch 32 | 1000/ 2727 batches | lr 1.02 | ms/batch 53.56 | loss 3.53 | ppl 34.28\n", + "| epoch 32 | 1200/ 2727 batches | lr 1.02 | ms/batch 53.60 | loss 3.57 | ppl 35.40\n", + "| epoch 32 | 1400/ 2727 batches | lr 1.02 | ms/batch 53.52 | loss 3.53 | ppl 34.04\n", + "| epoch 32 | 1600/ 2727 batches | lr 1.02 | ms/batch 53.54 | loss 3.52 | ppl 33.70\n", + "| epoch 32 | 1800/ 2727 batches | lr 1.02 | ms/batch 53.53 | loss 3.54 | ppl 34.47\n", + "| epoch 32 | 2000/ 2727 batches | lr 1.02 | ms/batch 53.56 | loss 3.54 | ppl 34.34\n", + "| epoch 32 | 2200/ 2727 batches | lr 1.02 | ms/batch 53.54 | loss 3.55 | ppl 34.93\n", + "| epoch 32 | 2400/ 2727 batches | lr 1.02 | ms/batch 53.56 | loss 3.55 | ppl 34.84\n", + "| epoch 32 | 2600/ 2727 batches | lr 1.02 | ms/batch 53.50 | loss 3.57 | ppl 35.47\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 32 | time: 152.84s | valid loss 5.77 | valid ppl 319.05\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 33 | 200/ 2727 batches | lr 0.97 | ms/batch 53.81 | loss 3.61 | ppl 37.13\n", + "| epoch 33 | 400/ 2727 batches | lr 0.97 | ms/batch 53.56 | loss 3.55 | ppl 34.83\n", + "| epoch 33 | 600/ 2727 batches | lr 0.97 | ms/batch 53.53 | loss 3.55 | ppl 34.87\n", + "| epoch 33 | 800/ 2727 batches | lr 0.97 | ms/batch 53.55 | loss 3.55 | ppl 34.64\n", + "| epoch 33 | 1000/ 2727 batches | lr 0.97 | ms/batch 53.57 | loss 3.51 | ppl 33.61\n", + "| epoch 33 | 1200/ 2727 batches | lr 0.97 | ms/batch 53.59 | loss 3.54 | ppl 34.49\n", + "| epoch 33 | 1400/ 2727 batches | lr 0.97 | ms/batch 53.51 | loss 3.50 | ppl 33.19\n", + "| epoch 33 | 1600/ 2727 batches | lr 0.97 | ms/batch 53.60 | loss 3.50 | ppl 33.13\n", + "| epoch 33 | 1800/ 2727 batches | lr 0.97 | ms/batch 53.56 | loss 3.52 | ppl 33.70\n", + "| epoch 33 | 2000/ 2727 batches | lr 0.97 | ms/batch 53.54 | loss 3.52 | ppl 33.72\n", + "| epoch 33 | 2200/ 2727 batches | lr 0.97 | ms/batch 53.56 | loss 3.54 | ppl 34.31\n", + "| epoch 33 | 2400/ 2727 batches | lr 0.97 | ms/batch 53.56 | loss 3.53 | ppl 34.12\n", + "| epoch 33 | 2600/ 2727 batches | lr 0.97 | ms/batch 53.55 | loss 3.55 | ppl 34.69\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 33 | time: 152.88s | valid loss 5.75 | valid ppl 315.11\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 34 | 200/ 2727 batches | lr 0.92 | ms/batch 53.84 | loss 3.60 | ppl 36.43\n", + "| epoch 34 | 400/ 2727 batches | lr 0.92 | ms/batch 53.55 | loss 3.53 | ppl 34.17\n", + "| epoch 34 | 600/ 2727 batches | lr 0.92 | ms/batch 53.58 | loss 3.53 | ppl 34.24\n", + "| epoch 34 | 800/ 2727 batches | lr 0.92 | ms/batch 53.57 | loss 3.53 | ppl 34.12\n", + "| epoch 34 | 1000/ 2727 batches | lr 0.92 | ms/batch 53.56 | loss 3.49 | ppl 32.90\n", + "| epoch 34 | 1200/ 2727 batches | lr 0.92 | ms/batch 53.57 | loss 3.52 | ppl 33.84\n", + "| epoch 34 | 1400/ 2727 batches | lr 0.92 | ms/batch 53.58 | loss 3.49 | ppl 32.76\n", + "| epoch 34 | 1600/ 2727 batches | lr 0.92 | ms/batch 53.57 | loss 3.48 | ppl 32.41\n", + "| epoch 34 | 1800/ 2727 batches | lr 0.92 | ms/batch 53.58 | loss 3.49 | ppl 32.88\n", + "| epoch 34 | 2000/ 2727 batches | lr 0.92 | ms/batch 53.55 | loss 3.49 | ppl 32.92\n", + "| epoch 34 | 2200/ 2727 batches | lr 0.92 | ms/batch 53.58 | loss 3.51 | ppl 33.45\n", + "| epoch 34 | 2400/ 2727 batches | lr 0.92 | ms/batch 53.57 | loss 3.51 | ppl 33.37\n", + "| epoch 34 | 2600/ 2727 batches | lr 0.92 | ms/batch 53.55 | loss 3.52 | ppl 33.91\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 34 | time: 152.92s | valid loss 5.78 | valid ppl 322.80\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 35 | 200/ 2727 batches | lr 0.87 | ms/batch 53.84 | loss 3.58 | ppl 35.78\n", + "| epoch 35 | 400/ 2727 batches | lr 0.87 | ms/batch 53.56 | loss 3.51 | ppl 33.57\n", + "| epoch 35 | 600/ 2727 batches | lr 0.87 | ms/batch 53.59 | loss 3.51 | ppl 33.51\n", + "| epoch 35 | 800/ 2727 batches | lr 0.87 | ms/batch 53.59 | loss 3.50 | ppl 33.28\n", + "| epoch 35 | 1000/ 2727 batches | lr 0.87 | ms/batch 53.54 | loss 3.47 | ppl 32.18\n", + "| epoch 35 | 1200/ 2727 batches | lr 0.87 | ms/batch 53.59 | loss 3.51 | ppl 33.34\n", + "| epoch 35 | 1400/ 2727 batches | lr 0.87 | ms/batch 53.57 | loss 3.47 | ppl 31.99\n", + "| epoch 35 | 1600/ 2727 batches | lr 0.87 | ms/batch 53.57 | loss 3.46 | ppl 31.87\n", + "| epoch 35 | 1800/ 2727 batches | lr 0.87 | ms/batch 53.58 | loss 3.48 | ppl 32.39\n", + "| epoch 35 | 2000/ 2727 batches | lr 0.87 | ms/batch 53.59 | loss 3.47 | ppl 32.23\n", + "| epoch 35 | 2200/ 2727 batches | lr 0.87 | ms/batch 53.55 | loss 3.50 | ppl 33.00\n", + "| epoch 35 | 2400/ 2727 batches | lr 0.87 | ms/batch 53.56 | loss 3.49 | ppl 32.83\n", + "| epoch 35 | 2600/ 2727 batches | lr 0.87 | ms/batch 53.56 | loss 3.51 | ppl 33.51\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 35 | time: 152.93s | valid loss 5.77 | valid ppl 321.70\n", + "-----------------------------------------------------------------------------------------\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| epoch 36 | 200/ 2727 batches | lr 0.83 | ms/batch 53.83 | loss 3.56 | ppl 35.18\n", + "| epoch 36 | 400/ 2727 batches | lr 0.83 | ms/batch 53.51 | loss 3.49 | ppl 32.93\n", + "| epoch 36 | 600/ 2727 batches | lr 0.83 | ms/batch 53.53 | loss 3.50 | ppl 32.98\n", + "| epoch 36 | 800/ 2727 batches | lr 0.83 | ms/batch 53.58 | loss 3.49 | ppl 32.95\n", + "| epoch 36 | 1000/ 2727 batches | lr 0.83 | ms/batch 53.58 | loss 3.46 | ppl 31.83\n", + "| epoch 36 | 1200/ 2727 batches | lr 0.83 | ms/batch 53.57 | loss 3.49 | ppl 32.66\n", + "| epoch 36 | 1400/ 2727 batches | lr 0.83 | ms/batch 53.52 | loss 3.46 | ppl 31.67\n", + "| epoch 36 | 1600/ 2727 batches | lr 0.83 | ms/batch 53.55 | loss 3.45 | ppl 31.48\n", + "| epoch 36 | 1800/ 2727 batches | lr 0.83 | ms/batch 53.54 | loss 3.47 | ppl 31.98\n", + "| epoch 36 | 2000/ 2727 batches | lr 0.83 | ms/batch 53.54 | loss 3.46 | ppl 31.85\n", + "| epoch 36 | 2200/ 2727 batches | lr 0.83 | ms/batch 53.55 | loss 3.48 | ppl 32.55\n", + "| epoch 36 | 2400/ 2727 batches | lr 0.83 | ms/batch 53.54 | loss 3.48 | ppl 32.37\n", + "| epoch 36 | 2600/ 2727 batches | lr 0.83 | ms/batch 53.59 | loss 3.50 | ppl 32.99\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 36 | time: 152.86s | valid loss 5.79 | valid ppl 325.64\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 37 | 200/ 2727 batches | lr 0.79 | ms/batch 53.81 | loss 3.55 | ppl 34.66\n", + "| epoch 37 | 400/ 2727 batches | lr 0.79 | ms/batch 53.58 | loss 3.48 | ppl 32.32\n", + "| epoch 37 | 600/ 2727 batches | lr 0.79 | ms/batch 53.57 | loss 3.48 | ppl 32.49\n", + "| epoch 37 | 800/ 2727 batches | lr 0.79 | ms/batch 53.55 | loss 3.48 | ppl 32.35\n", + "| epoch 37 | 1000/ 2727 batches | lr 0.79 | ms/batch 53.53 | loss 3.44 | ppl 31.18\n", + "| epoch 37 | 1200/ 2727 batches | lr 0.79 | ms/batch 53.50 | loss 3.47 | ppl 32.21\n", + "| epoch 37 | 1400/ 2727 batches | lr 0.79 | ms/batch 53.50 | loss 3.44 | ppl 31.11\n", + "| epoch 37 | 1600/ 2727 batches | lr 0.79 | ms/batch 53.54 | loss 3.43 | ppl 30.90\n", + "| epoch 37 | 1800/ 2727 batches | lr 0.79 | ms/batch 53.56 | loss 3.44 | ppl 31.32\n", + "| epoch 37 | 2000/ 2727 batches | lr 0.79 | ms/batch 53.54 | loss 3.44 | ppl 31.32\n", + "| epoch 37 | 2200/ 2727 batches | lr 0.79 | ms/batch 53.57 | loss 3.46 | ppl 31.85\n", + "| epoch 37 | 2400/ 2727 batches | lr 0.79 | ms/batch 53.59 | loss 3.46 | ppl 31.68\n", + "| epoch 37 | 2600/ 2727 batches | lr 0.79 | ms/batch 53.53 | loss 3.48 | ppl 32.34\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 37 | time: 152.85s | valid loss 5.81 | valid ppl 332.87\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 38 | 200/ 2727 batches | lr 0.75 | ms/batch 53.83 | loss 3.53 | ppl 33.96\n", + "| epoch 38 | 400/ 2727 batches | lr 0.75 | ms/batch 53.58 | loss 3.46 | ppl 31.95\n", + "| epoch 38 | 600/ 2727 batches | lr 0.75 | ms/batch 53.57 | loss 3.46 | ppl 31.93\n", + "| epoch 38 | 800/ 2727 batches | lr 0.75 | ms/batch 53.56 | loss 3.46 | ppl 31.78\n", + "| epoch 38 | 1000/ 2727 batches | lr 0.75 | ms/batch 53.56 | loss 3.42 | ppl 30.67\n", + "| epoch 38 | 1200/ 2727 batches | lr 0.75 | ms/batch 53.57 | loss 3.45 | ppl 31.64\n", + "| epoch 38 | 1400/ 2727 batches | lr 0.75 | ms/batch 53.52 | loss 3.42 | ppl 30.59\n", + "| epoch 38 | 1600/ 2727 batches | lr 0.75 | ms/batch 53.58 | loss 3.42 | ppl 30.46\n", + "| epoch 38 | 1800/ 2727 batches | lr 0.75 | ms/batch 53.56 | loss 3.43 | ppl 30.89\n", + "| epoch 38 | 2000/ 2727 batches | lr 0.75 | ms/batch 53.55 | loss 3.43 | ppl 30.79\n", + "| epoch 38 | 2200/ 2727 batches | lr 0.75 | ms/batch 53.57 | loss 3.45 | ppl 31.52\n", + "| epoch 38 | 2400/ 2727 batches | lr 0.75 | ms/batch 53.53 | loss 3.45 | ppl 31.37\n", + "| epoch 38 | 2600/ 2727 batches | lr 0.75 | ms/batch 53.56 | loss 3.46 | ppl 31.90\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 38 | time: 152.91s | valid loss 5.85 | valid ppl 345.69\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 39 | 200/ 2727 batches | lr 0.71 | ms/batch 53.88 | loss 3.51 | ppl 33.48\n", + "| epoch 39 | 400/ 2727 batches | lr 0.71 | ms/batch 53.56 | loss 3.45 | ppl 31.43\n", + "| epoch 39 | 600/ 2727 batches | lr 0.71 | ms/batch 53.56 | loss 3.45 | ppl 31.60\n", + "| epoch 39 | 800/ 2727 batches | lr 0.71 | ms/batch 53.57 | loss 3.44 | ppl 31.28\n", + "| epoch 39 | 1000/ 2727 batches | lr 0.71 | ms/batch 53.57 | loss 3.42 | ppl 30.42\n", + "| epoch 39 | 1200/ 2727 batches | lr 0.71 | ms/batch 53.56 | loss 3.44 | ppl 31.29\n", + "| epoch 39 | 1400/ 2727 batches | lr 0.71 | ms/batch 53.56 | loss 3.41 | ppl 30.15\n", + "| epoch 39 | 1600/ 2727 batches | lr 0.71 | ms/batch 53.57 | loss 3.40 | ppl 30.06\n", + "| epoch 39 | 1800/ 2727 batches | lr 0.71 | ms/batch 53.57 | loss 3.42 | ppl 30.46\n", + "| epoch 39 | 2000/ 2727 batches | lr 0.71 | ms/batch 53.55 | loss 3.41 | ppl 30.38\n", + "| epoch 39 | 2200/ 2727 batches | lr 0.71 | ms/batch 53.57 | loss 3.43 | ppl 31.02\n", + "| epoch 39 | 2400/ 2727 batches | lr 0.71 | ms/batch 53.61 | loss 3.43 | ppl 30.88\n", + "| epoch 39 | 2600/ 2727 batches | lr 0.71 | ms/batch 53.58 | loss 3.44 | ppl 31.33\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 39 | time: 152.92s | valid loss 5.84 | valid ppl 343.47\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 40 | 200/ 2727 batches | lr 0.68 | ms/batch 53.87 | loss 3.50 | ppl 33.00\n", + "| epoch 40 | 400/ 2727 batches | lr 0.68 | ms/batch 53.57 | loss 3.44 | ppl 31.12\n", + "| epoch 40 | 600/ 2727 batches | lr 0.68 | ms/batch 53.54 | loss 3.44 | ppl 31.19\n", + "| epoch 40 | 800/ 2727 batches | lr 0.68 | ms/batch 53.55 | loss 3.43 | ppl 30.90\n", + "| epoch 40 | 1000/ 2727 batches | lr 0.68 | ms/batch 53.55 | loss 3.40 | ppl 29.94\n", + "| epoch 40 | 1200/ 2727 batches | lr 0.68 | ms/batch 53.55 | loss 3.43 | ppl 30.78\n", + "| epoch 40 | 1400/ 2727 batches | lr 0.68 | ms/batch 53.55 | loss 3.40 | ppl 29.83\n", + "| epoch 40 | 1600/ 2727 batches | lr 0.68 | ms/batch 53.52 | loss 3.39 | ppl 29.69\n", + "| epoch 40 | 1800/ 2727 batches | lr 0.68 | ms/batch 53.54 | loss 3.40 | ppl 30.10\n", + "| epoch 40 | 2000/ 2727 batches | lr 0.68 | ms/batch 53.55 | loss 3.40 | ppl 30.00\n", + "| epoch 40 | 2200/ 2727 batches | lr 0.68 | ms/batch 53.57 | loss 3.42 | ppl 30.59\n", + "| epoch 40 | 2400/ 2727 batches | lr 0.68 | ms/batch 53.56 | loss 3.42 | ppl 30.57\n", + "| epoch 40 | 2600/ 2727 batches | lr 0.68 | ms/batch 53.55 | loss 3.43 | ppl 30.90\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 40 | time: 152.88s | valid loss 5.83 | valid ppl 338.70\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 41 | 200/ 2727 batches | lr 0.64 | ms/batch 53.80 | loss 3.48 | ppl 32.57\n", + "| epoch 41 | 400/ 2727 batches | lr 0.64 | ms/batch 53.50 | loss 3.42 | ppl 30.64\n", + "| epoch 41 | 600/ 2727 batches | lr 0.64 | ms/batch 53.54 | loss 3.42 | ppl 30.65\n", + "| epoch 41 | 800/ 2727 batches | lr 0.64 | ms/batch 53.52 | loss 3.42 | ppl 30.49\n", + "| epoch 41 | 1000/ 2727 batches | lr 0.64 | ms/batch 53.56 | loss 3.39 | ppl 29.54\n", + "| epoch 41 | 1200/ 2727 batches | lr 0.64 | ms/batch 53.56 | loss 3.41 | ppl 30.40\n", + "| epoch 41 | 1400/ 2727 batches | lr 0.64 | ms/batch 53.54 | loss 3.38 | ppl 29.44\n", + "| epoch 41 | 1600/ 2727 batches | lr 0.64 | ms/batch 53.54 | loss 3.38 | ppl 29.33\n", + "| epoch 41 | 1800/ 2727 batches | lr 0.64 | ms/batch 53.54 | loss 3.39 | ppl 29.65\n", + "| epoch 41 | 2000/ 2727 batches | lr 0.64 | ms/batch 53.59 | loss 3.39 | ppl 29.57\n", + "| epoch 41 | 2200/ 2727 batches | lr 0.64 | ms/batch 53.55 | loss 3.41 | ppl 30.35\n", + "| epoch 41 | 2400/ 2727 batches | lr 0.64 | ms/batch 53.57 | loss 3.40 | ppl 30.11\n", + "| epoch 41 | 2600/ 2727 batches | lr 0.64 | ms/batch 53.53 | loss 3.42 | ppl 30.61\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 41 | time: 152.85s | valid loss 5.88 | valid ppl 356.84\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 42 | 200/ 2727 batches | lr 0.61 | ms/batch 53.80 | loss 3.47 | ppl 32.18\n", + "| epoch 42 | 400/ 2727 batches | lr 0.61 | ms/batch 53.55 | loss 3.41 | ppl 30.24\n", + "| epoch 42 | 600/ 2727 batches | lr 0.61 | ms/batch 53.57 | loss 3.41 | ppl 30.26\n", + "| epoch 42 | 800/ 2727 batches | lr 0.61 | ms/batch 53.56 | loss 3.41 | ppl 30.22\n", + "| epoch 42 | 1000/ 2727 batches | lr 0.61 | ms/batch 53.52 | loss 3.37 | ppl 29.20\n", + "| epoch 42 | 1200/ 2727 batches | lr 0.61 | ms/batch 53.55 | loss 3.41 | ppl 30.19\n", + "| epoch 42 | 1400/ 2727 batches | lr 0.61 | ms/batch 53.57 | loss 3.37 | ppl 29.18\n", + "| epoch 42 | 1600/ 2727 batches | lr 0.61 | ms/batch 53.55 | loss 3.37 | ppl 29.03\n", + "| epoch 42 | 1800/ 2727 batches | lr 0.61 | ms/batch 53.58 | loss 3.38 | ppl 29.29\n", + "| epoch 42 | 2000/ 2727 batches | lr 0.61 | ms/batch 53.56 | loss 3.37 | ppl 29.19\n", + "| epoch 42 | 2200/ 2727 batches | lr 0.61 | ms/batch 53.56 | loss 3.40 | ppl 30.07\n", + "| epoch 42 | 2400/ 2727 batches | lr 0.61 | ms/batch 53.57 | loss 3.39 | ppl 29.71\n", + "| epoch 42 | 2600/ 2727 batches | lr 0.61 | ms/batch 53.51 | loss 3.41 | ppl 30.28\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 42 | time: 152.86s | valid loss 5.89 | valid ppl 361.10\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 43 | 200/ 2727 batches | lr 0.58 | ms/batch 53.84 | loss 3.46 | ppl 31.79\n", + "| epoch 43 | 400/ 2727 batches | lr 0.58 | ms/batch 53.55 | loss 3.40 | ppl 29.93\n", + "| epoch 43 | 600/ 2727 batches | lr 0.58 | ms/batch 53.54 | loss 3.41 | ppl 30.14\n", + "| epoch 43 | 800/ 2727 batches | lr 0.58 | ms/batch 53.54 | loss 3.40 | ppl 29.82\n", + "| epoch 43 | 1000/ 2727 batches | lr 0.58 | ms/batch 53.55 | loss 3.37 | ppl 28.96\n", + "| epoch 43 | 1200/ 2727 batches | lr 0.58 | ms/batch 53.52 | loss 3.40 | ppl 29.83\n", + "| epoch 43 | 1400/ 2727 batches | lr 0.58 | ms/batch 53.55 | loss 3.36 | ppl 28.81\n", + "| epoch 43 | 1600/ 2727 batches | lr 0.58 | ms/batch 53.53 | loss 3.35 | ppl 28.63\n", + "| epoch 43 | 1800/ 2727 batches | lr 0.58 | ms/batch 53.57 | loss 3.37 | ppl 28.98\n", + "| epoch 43 | 2000/ 2727 batches | lr 0.58 | ms/batch 53.56 | loss 3.36 | ppl 28.86\n", + "| epoch 43 | 2200/ 2727 batches | lr 0.58 | ms/batch 53.57 | loss 3.39 | ppl 29.56\n", + "| epoch 43 | 2400/ 2727 batches | lr 0.58 | ms/batch 53.56 | loss 3.38 | ppl 29.23\n", + "| epoch 43 | 2600/ 2727 batches | lr 0.58 | ms/batch 53.55 | loss 3.39 | ppl 29.80\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 43 | time: 152.86s | valid loss 5.89 | valid ppl 360.64\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 44 | 200/ 2727 batches | lr 0.55 | ms/batch 53.79 | loss 3.45 | ppl 31.40\n", + "| epoch 44 | 400/ 2727 batches | lr 0.55 | ms/batch 53.57 | loss 3.39 | ppl 29.58\n", + "| epoch 44 | 600/ 2727 batches | lr 0.55 | ms/batch 53.51 | loss 3.39 | ppl 29.67\n", + "| epoch 44 | 800/ 2727 batches | lr 0.55 | ms/batch 53.50 | loss 3.39 | ppl 29.62\n", + "| epoch 44 | 1000/ 2727 batches | lr 0.55 | ms/batch 53.52 | loss 3.35 | ppl 28.53\n", + "| epoch 44 | 1200/ 2727 batches | lr 0.55 | ms/batch 53.51 | loss 3.38 | ppl 29.51\n", + "| epoch 44 | 1400/ 2727 batches | lr 0.55 | ms/batch 53.52 | loss 3.34 | ppl 28.34\n", + "| epoch 44 | 1600/ 2727 batches | lr 0.55 | ms/batch 53.54 | loss 3.35 | ppl 28.40\n", + "| epoch 44 | 1800/ 2727 batches | lr 0.55 | ms/batch 53.53 | loss 3.36 | ppl 28.73\n", + "| epoch 44 | 2000/ 2727 batches | lr 0.55 | ms/batch 53.50 | loss 3.35 | ppl 28.62\n", + "| epoch 44 | 2200/ 2727 batches | lr 0.55 | ms/batch 53.53 | loss 3.38 | ppl 29.46\n", + "| epoch 44 | 2400/ 2727 batches | lr 0.55 | ms/batch 53.56 | loss 3.37 | ppl 29.15\n", + "| epoch 44 | 2600/ 2727 batches | lr 0.55 | ms/batch 53.51 | loss 3.39 | ppl 29.55\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 44 | time: 152.80s | valid loss 5.90 | valid ppl 363.28\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 45 | 200/ 2727 batches | lr 0.52 | ms/batch 53.82 | loss 3.44 | ppl 31.17\n", + "| epoch 45 | 400/ 2727 batches | lr 0.52 | ms/batch 53.53 | loss 3.37 | ppl 29.21\n", + "| epoch 45 | 600/ 2727 batches | lr 0.52 | ms/batch 53.54 | loss 3.38 | ppl 29.47\n", + "| epoch 45 | 800/ 2727 batches | lr 0.52 | ms/batch 53.56 | loss 3.37 | ppl 29.18\n", + "| epoch 45 | 1000/ 2727 batches | lr 0.52 | ms/batch 53.56 | loss 3.34 | ppl 28.18\n", + "| epoch 45 | 1200/ 2727 batches | lr 0.52 | ms/batch 53.55 | loss 3.37 | ppl 29.21\n", + "| epoch 45 | 1400/ 2727 batches | lr 0.52 | ms/batch 53.59 | loss 3.34 | ppl 28.17\n", + "| epoch 45 | 1600/ 2727 batches | lr 0.52 | ms/batch 53.60 | loss 3.34 | ppl 28.12\n", + "| epoch 45 | 1800/ 2727 batches | lr 0.52 | ms/batch 53.56 | loss 3.34 | ppl 28.34\n", + "| epoch 45 | 2000/ 2727 batches | lr 0.52 | ms/batch 53.60 | loss 3.34 | ppl 28.15\n", + "| epoch 45 | 2200/ 2727 batches | lr 0.52 | ms/batch 53.53 | loss 3.37 | ppl 29.09\n", + "| epoch 45 | 2400/ 2727 batches | lr 0.52 | ms/batch 53.58 | loss 3.36 | ppl 28.73\n", + "| epoch 45 | 2600/ 2727 batches | lr 0.52 | ms/batch 53.57 | loss 3.38 | ppl 29.24\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 45 | time: 152.91s | valid loss 5.88 | valid ppl 357.62\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 46 | 200/ 2727 batches | lr 0.50 | ms/batch 53.82 | loss 3.43 | ppl 30.82\n", + "| epoch 46 | 400/ 2727 batches | lr 0.50 | ms/batch 53.58 | loss 3.37 | ppl 29.09\n", + "| epoch 46 | 600/ 2727 batches | lr 0.50 | ms/batch 53.60 | loss 3.37 | ppl 29.09\n", + "| epoch 46 | 800/ 2727 batches | lr 0.50 | ms/batch 53.53 | loss 3.37 | ppl 29.05\n", + "| epoch 46 | 1000/ 2727 batches | lr 0.50 | ms/batch 53.49 | loss 3.33 | ppl 27.90\n", + "| epoch 46 | 1200/ 2727 batches | lr 0.50 | ms/batch 53.54 | loss 3.36 | ppl 28.87\n", + "| epoch 46 | 1400/ 2727 batches | lr 0.50 | ms/batch 53.54 | loss 3.33 | ppl 27.82\n", + "| epoch 46 | 1600/ 2727 batches | lr 0.50 | ms/batch 53.51 | loss 3.33 | ppl 28.01\n", + "| epoch 46 | 1800/ 2727 batches | lr 0.50 | ms/batch 53.53 | loss 3.34 | ppl 28.27\n", + "| epoch 46 | 2000/ 2727 batches | lr 0.50 | ms/batch 53.55 | loss 3.33 | ppl 27.98\n", + "| epoch 46 | 2200/ 2727 batches | lr 0.50 | ms/batch 53.55 | loss 3.36 | ppl 28.71\n", + "| epoch 46 | 2400/ 2727 batches | lr 0.50 | ms/batch 53.59 | loss 3.36 | ppl 28.66\n", + "| epoch 46 | 2600/ 2727 batches | lr 0.50 | ms/batch 53.54 | loss 3.37 | ppl 29.06\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 46 | time: 152.86s | valid loss 5.90 | valid ppl 365.76\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 47 | 200/ 2727 batches | lr 0.47 | ms/batch 53.82 | loss 3.42 | ppl 30.45\n", + "| epoch 47 | 400/ 2727 batches | lr 0.47 | ms/batch 53.51 | loss 3.36 | ppl 28.78\n", + "| epoch 47 | 600/ 2727 batches | lr 0.47 | ms/batch 53.48 | loss 3.36 | ppl 28.84\n", + "| epoch 47 | 800/ 2727 batches | lr 0.47 | ms/batch 53.53 | loss 3.36 | ppl 28.80\n", + "| epoch 47 | 1000/ 2727 batches | lr 0.47 | ms/batch 53.54 | loss 3.32 | ppl 27.68\n", + "| epoch 47 | 1200/ 2727 batches | lr 0.47 | ms/batch 53.51 | loss 3.35 | ppl 28.58\n", + "| epoch 47 | 1400/ 2727 batches | lr 0.47 | ms/batch 53.54 | loss 3.32 | ppl 27.63\n", + "| epoch 47 | 1600/ 2727 batches | lr 0.47 | ms/batch 53.53 | loss 3.32 | ppl 27.62\n", + "| epoch 47 | 1800/ 2727 batches | lr 0.47 | ms/batch 53.55 | loss 3.33 | ppl 27.84\n", + "| epoch 47 | 2000/ 2727 batches | lr 0.47 | ms/batch 53.55 | loss 3.33 | ppl 27.85\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| epoch 47 | 2200/ 2727 batches | lr 0.47 | ms/batch 53.54 | loss 3.35 | ppl 28.47\n", + "| epoch 47 | 2400/ 2727 batches | lr 0.47 | ms/batch 53.51 | loss 3.34 | ppl 28.17\n", + "| epoch 47 | 2600/ 2727 batches | lr 0.47 | ms/batch 53.54 | loss 3.36 | ppl 28.89\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 47 | time: 152.80s | valid loss 5.88 | valid ppl 358.73\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 48 | 200/ 2727 batches | lr 0.45 | ms/batch 53.87 | loss 3.41 | ppl 30.22\n", + "| epoch 48 | 400/ 2727 batches | lr 0.45 | ms/batch 53.55 | loss 3.35 | ppl 28.59\n", + "| epoch 48 | 600/ 2727 batches | lr 0.45 | ms/batch 53.56 | loss 3.36 | ppl 28.69\n", + "| epoch 48 | 800/ 2727 batches | lr 0.45 | ms/batch 53.53 | loss 3.35 | ppl 28.56\n", + "| epoch 48 | 1000/ 2727 batches | lr 0.45 | ms/batch 53.50 | loss 3.31 | ppl 27.35\n", + "| epoch 48 | 1200/ 2727 batches | lr 0.45 | ms/batch 53.55 | loss 3.34 | ppl 28.31\n", + "| epoch 48 | 1400/ 2727 batches | lr 0.45 | ms/batch 53.52 | loss 3.31 | ppl 27.48\n", + "| epoch 48 | 1600/ 2727 batches | lr 0.45 | ms/batch 53.53 | loss 3.31 | ppl 27.50\n", + "| epoch 48 | 1800/ 2727 batches | lr 0.45 | ms/batch 53.51 | loss 3.32 | ppl 27.60\n", + "| epoch 48 | 2000/ 2727 batches | lr 0.45 | ms/batch 53.52 | loss 3.31 | ppl 27.47\n", + "| epoch 48 | 2200/ 2727 batches | lr 0.45 | ms/batch 53.54 | loss 3.34 | ppl 28.19\n", + "| epoch 48 | 2400/ 2727 batches | lr 0.45 | ms/batch 53.53 | loss 3.33 | ppl 27.93\n", + "| epoch 48 | 2600/ 2727 batches | lr 0.45 | ms/batch 53.54 | loss 3.35 | ppl 28.55\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 48 | time: 152.84s | valid loss 5.90 | valid ppl 365.29\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 49 | 200/ 2727 batches | lr 0.43 | ms/batch 53.78 | loss 3.40 | ppl 30.09\n", + "| epoch 49 | 400/ 2727 batches | lr 0.43 | ms/batch 53.53 | loss 3.35 | ppl 28.49\n", + "| epoch 49 | 600/ 2727 batches | lr 0.43 | ms/batch 53.54 | loss 3.35 | ppl 28.53\n", + "| epoch 49 | 800/ 2727 batches | lr 0.43 | ms/batch 53.55 | loss 3.34 | ppl 28.24\n", + "| epoch 49 | 1000/ 2727 batches | lr 0.43 | ms/batch 53.54 | loss 3.31 | ppl 27.33\n", + "| epoch 49 | 1200/ 2727 batches | lr 0.43 | ms/batch 53.58 | loss 3.34 | ppl 28.22\n", + "| epoch 49 | 1400/ 2727 batches | lr 0.43 | ms/batch 53.51 | loss 3.30 | ppl 27.17\n", + "| epoch 49 | 1600/ 2727 batches | lr 0.43 | ms/batch 53.53 | loss 3.30 | ppl 27.18\n", + "| epoch 49 | 1800/ 2727 batches | lr 0.43 | ms/batch 53.53 | loss 3.31 | ppl 27.27\n", + "| epoch 49 | 2000/ 2727 batches | lr 0.43 | ms/batch 53.51 | loss 3.31 | ppl 27.28\n", + "| epoch 49 | 2200/ 2727 batches | lr 0.43 | ms/batch 53.52 | loss 3.33 | ppl 28.07\n", + "| epoch 49 | 2400/ 2727 batches | lr 0.43 | ms/batch 53.49 | loss 3.33 | ppl 27.83\n", + "| epoch 49 | 2600/ 2727 batches | lr 0.43 | ms/batch 53.57 | loss 3.34 | ppl 28.35\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 49 | time: 152.82s | valid loss 5.92 | valid ppl 373.45\n", + "-----------------------------------------------------------------------------------------\n", + "| epoch 50 | 200/ 2727 batches | lr 0.40 | ms/batch 53.81 | loss 3.39 | ppl 29.78\n", + "| epoch 50 | 400/ 2727 batches | lr 0.40 | ms/batch 53.56 | loss 3.34 | ppl 28.10\n", + "| epoch 50 | 600/ 2727 batches | lr 0.40 | ms/batch 53.50 | loss 3.34 | ppl 28.15\n", + "| epoch 50 | 800/ 2727 batches | lr 0.40 | ms/batch 53.54 | loss 3.33 | ppl 27.93\n", + "| epoch 50 | 1000/ 2727 batches | lr 0.40 | ms/batch 53.52 | loss 3.30 | ppl 27.23\n", + "| epoch 50 | 1200/ 2727 batches | lr 0.40 | ms/batch 53.54 | loss 3.33 | ppl 27.84\n", + "| epoch 50 | 1400/ 2727 batches | lr 0.40 | ms/batch 53.56 | loss 3.29 | ppl 26.92\n", + "| epoch 50 | 1600/ 2727 batches | lr 0.40 | ms/batch 53.58 | loss 3.30 | ppl 27.10\n", + "| epoch 50 | 1800/ 2727 batches | lr 0.40 | ms/batch 53.55 | loss 3.30 | ppl 27.16\n", + "| epoch 50 | 2000/ 2727 batches | lr 0.40 | ms/batch 53.55 | loss 3.30 | ppl 27.02\n", + "| epoch 50 | 2200/ 2727 batches | lr 0.40 | ms/batch 53.54 | loss 3.33 | ppl 27.89\n", + "| epoch 50 | 2400/ 2727 batches | lr 0.40 | ms/batch 53.58 | loss 3.32 | ppl 27.57\n", + "| epoch 50 | 2600/ 2727 batches | lr 0.40 | ms/batch 53.57 | loss 3.33 | ppl 27.91\n", + "-----------------------------------------------------------------------------------------\n", + "| end of epoch 50 | time: 152.87s | valid loss 5.92 | valid ppl 371.61\n", "-----------------------------------------------------------------------------------------\n" ] } ], "source": [ "best_val_loss = float('inf')\n", - "epochs = 10\n", + "epochs = 50\n", "best_model = None\n", "\n", "for epoch in range(1, epochs + 1):\n", @@ -766,7 +1563,7 @@ }, { "cell_type": "markdown", - "id": "565b5aa4", + "id": "f0d32419", "metadata": {}, "source": [ "### print info about best model after training" @@ -774,7 +1571,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 44, "id": "12fdd0aa", "metadata": { "scrolled": true @@ -785,7 +1582,7 @@ "output_type": "stream", "text": [ "=========================================================================================\n", - "| End of training | test loss 5.80 | test ppl 329.59\n", + "| End of training | test loss 5.23 | test ppl 186.92\n", "=========================================================================================\n" ] } @@ -801,7 +1598,25 @@ }, { "cell_type": "markdown", - "id": "12031065", + "id": "3abfa421", + "metadata": {}, + "source": [ + "### save trained model to file" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "a747e692", + "metadata": {}, + "outputs": [], + "source": [ + "torch.save(best_model.state_dict(), \"autocomplete_model\")" + ] + }, + { + "cell_type": "markdown", + "id": "09df56cf", "metadata": {}, "source": [ "## Now we can try to predict based on trained model" @@ -817,28 +1632,29 @@ }, { "cell_type": "code", - "execution_count": 300, + "execution_count": 46, "id": "cfb30fe0", "metadata": {}, "outputs": [], "source": [ - "input_batch = [\n", + "sample_batch = [\n", " \"The brain is\",\n", " \"The lung is\"\n", - "]" + "]\n", + "input_batch = sample_batch" ] }, { "cell_type": "markdown", - "id": "054ada71", + "id": "10d51d39", "metadata": {}, "source": [ - "### define source mask for model" + "### define initial source mask for model" ] }, { "cell_type": "code", - "execution_count": 301, + "execution_count": 47, "id": "305853e8", "metadata": {}, "outputs": [], @@ -849,28 +1665,27 @@ }, { "cell_type": "markdown", - "id": "4635a73e", + "id": "fe250072", "metadata": {}, "source": [ - "### define iterator for predict batch and init to generator" + "### define iterator for predict batch " ] }, { "cell_type": "code", - "execution_count": 302, + "execution_count": 48, "id": "afe585d6", "metadata": {}, "outputs": [], "source": [ "def predict_abstract_iter():\n", - " for batch in input_batch:\n", - " yield tokenizer(batch)\n", - "predict_generator = predict_abstract_iter()" + " for batch in sample_batch:\n", + " yield tokenizer(batch)" ] }, { "cell_type": "markdown", - "id": "1c171c8c", + "id": "b043de0a", "metadata": {}, "source": [ "### load data into tensor for model to process" @@ -878,38 +1693,19 @@ }, { "cell_type": "code", - "execution_count": 303, - "id": "0788b045", - "metadata": {}, - "outputs": [], - "source": [ - "data = [torch.tensor(vocab.lookup_indices(item)) for item in predict_generator]" - ] - }, - { - "cell_type": "code", - "execution_count": 308, + "execution_count": 49, "id": "8bfaa8bd", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[tensor([ 3, 555, 16]), tensor([ 3, 76, 16])]" - ] - }, - "execution_count": 308, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "data" + "def toDataTensor():\n", + " predict_generator = predict_abstract_iter()\n", + " return [torch.tensor(vocab.lookup_indices(item)) for item in predict_generator]" ] }, { "cell_type": "markdown", - "id": "99132b3d", + "id": "a800ffea", "metadata": {}, "source": [ "### check device once again (prob not needed)" @@ -917,8 +1713,8 @@ }, { "cell_type": "code", - "execution_count": 309, - "id": "b8c50c8c", + "execution_count": 50, + "id": "6e2c35ba", "metadata": {}, "outputs": [ { @@ -927,7 +1723,7 @@ "device(type='cuda')" ] }, - "execution_count": 309, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -939,7 +1735,7 @@ }, { "cell_type": "markdown", - "id": "05766f6b", + "id": "dd71bdfc", "metadata": {}, "source": [ "### define predict function" @@ -947,15 +1743,15 @@ }, { "cell_type": "code", - "execution_count": 317, - "id": "0475bcc9", + "execution_count": 51, + "id": "64223e87", "metadata": {}, "outputs": [], "source": [ - "def predict(input_line, n_predictions=3):\n", + "def predict(input_line, mask, n_predictions=3):\n", " print('\\n> %s' % input_line)\n", " with torch.no_grad():\n", - " output = best_model(input_line.to(device), src_mask)\n", + " output = best_model(input_line.to(device), mask)\n", "\n", " # Get top N categories\n", " topv, topi = output.topk(n_predictions, 1, True)\n", @@ -968,67 +1764,93 @@ " value = topv[0][i]\n", " v1, v2 = value.topk(1)\n", " predict_token_index = v2.cpu().detach().numpy()\n", - " print(vocab.lookup_token(predict_token_index))\n", - " #print(category_index)\n", - " #print('(%.2f) %s' % (value, all_categories[category_index]))\n", " predictions.append(vocab.lookup_token(predict_token_index))\n", " return predictions" ] }, { "cell_type": "markdown", - "id": "8ad2f64b", + "id": "5b33b9f3", + "metadata": {}, + "source": [ + "### Execute prediction and display predicted values and choose continuation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c83e3b75", "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "b2895698", + "metadata": {}, + "outputs": [], "source": [ - "### Execute prediction and display predicted values" + "def predict_loop(num_of_pred):\n", + " iteration = 0\n", + " is_terminated = False\n", + " input_batch = sample_batch\n", + " while(not is_terminated):\n", + " # I guess 2*count is need because spaces get counted aswell\n", + " mask_size = bptt+(iteration)\n", + " print(mask_size)\n", + " src_mask = generate_square_subsequent_mask(mask_size).to(device)\n", + " data = toDataTensor()\n", + " for i, d in enumerate(data):\n", + " predictions = predict(d, src_mask, num_of_pred)\n", + " print(\"Current input:\", count)\n", + " print(input_batch[count])\n", + " print(\"Possible continuations:\")\n", + " for j in range(len(predictions)):\n", + " print(j + 1, \": \", predictions[j])\n", + " s_index = input(\"Choose continuation by index:\")\n", + " if(\"e\" in s_index):\n", + " is_terminated = True\n", + " print(\"prediction stopped.\")\n", + " break\n", + "\n", + " print(\"Text is now:\")\n", + " input_batch[i] += (\" \" + predictions[int(s_index) -1])\n", + " print(input_batch[i])\n", + "\n", + " iteration = iteration + 1" ] }, { "cell_type": "code", - "execution_count": 318, - "id": "55b73ea1", + "execution_count": 54, + "id": "13ed9298", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[tensor([ 3, 555, 16]), tensor([ 3, 76, 16])]\n", - "\n", - "> tensor([ 3, 555, 16])\n", - "tumors\n", - "the\n", - "the\n", - "The brain is\n", - "Possible continuations:\n", - "0 : tumors\n", - "0 : the\n", - "0 : the\n", + "3\n", "\n", - "> tensor([ 3, 76, 16])\n", - "cancer\n", - "most\n", - "the\n", - "The lung is\n", - "Possible continuations:\n", - "0 : cancer\n", - "0 : most\n", - "0 : the\n" + "> tensor([ 3, 542, 17])\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'count' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn [54], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpredict_loop\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn [52], line 13\u001b[0m, in \u001b[0;36mpredict_loop\u001b[0;34m(num_of_pred)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, d \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(data):\n\u001b[1;32m 12\u001b[0m predictions \u001b[38;5;241m=\u001b[39m predict(d, src_mask, num_of_pred)\n\u001b[0;32m---> 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCurrent input:\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[43mcount\u001b[49m)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(input_batch[count])\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPossible continuations:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mNameError\u001b[0m: name 'count' is not defined" ] } ], "source": [ - "print(data)\n", - "count = 0\n", - "num_of_pred = 3\n", - "for d in data:\n", - " predictions = predict(d, num_of_pred)\n", - " print(input_batch[count])\n", - " print(\"Possible continuations:\")\n", - " for j in range(len(predictions)):\n", - " print(i, \": \", predictions[j])\n", - " count = count + 1\n", - " " + "predict_loop(3)" ] } ],