fix prediction and add doc

2 years ago · 4e62562df3
1 changed files with 187 additions and 103 deletions
--- a/AutomaticSentenceCompletion.ipynb
+++ b/AutomaticSentenceCompletion.ipynb
@ -435,12 +435,12 @@
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "da8fb12b",
+   "cell_type": "markdown",
+   "id": "3b78cc08",
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "### define pos encoder"
+   ]
  },
  {
   "cell_type": "code",
@ -471,6 +471,14 @@
    "        return self.dropout(x)\n"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "0adefcce",
+   "metadata": {},
+   "source": [
+    "### define function to create batches of data and create batches"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 48,
@ -509,6 +517,14 @@
    "test_data = batchify(test_data, eval_batch_size)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "4f407ad0",
+   "metadata": {},
+   "source": [
+    "### define function to get batch"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 50,
@ -533,6 +549,14 @@
    "    return data, target"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "7ee28c38",
+   "metadata": {},
+   "source": [
+    "### define parameters and init model"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 51,
@ -549,10 +573,18 @@
    "model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout).to(device)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "51f2400a",
+   "metadata": {},
+   "source": [
+    "### init optimizer, loss, scheduler etc."
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 52,
-   "id": "50ab3fb6",
+   "execution_count": null,
+   "id": "b9a04e07",
   "metadata": {},
   "outputs": [],
   "source": [
@ -562,7 +594,24 @@
    "criterion = nn.CrossEntropyLoss()\n",
    "lr = 5.0  # learning rate\n",
    "optimizer = torch.optim.SGD(model.parameters(), lr=lr)\n",
-    "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)\n",
+    "scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07317af8",
+   "metadata": {},
+   "source": [
+    "### define train function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "id": "50ab3fb6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "\n",
    "def train(model: nn.Module) -> None:\n",
    "    model.train()  # turn on train mode\n",
@ -595,8 +644,24 @@
    "                  f'lr {lr:02.2f} | ms/batch {ms_per_batch:5.2f} | '\n",
    "                  f'loss {cur_loss:5.2f} | ppl {ppl:8.2f}')\n",
    "            total_loss = 0\n",
-    "            start_time = time.time()\n",
-    "\n",
+    "            start_time = time.time()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "23709949",
+   "metadata": {},
+   "source": [
+    "### define evaluate function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 289,
+   "id": "689bd4ea",
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "def evaluate(model: nn.Module, eval_data: Tensor) -> float:\n",
    "    model.eval()  # turn on evaluation mode\n",
    "    total_loss = 0.\n",
@ -613,11 +678,21 @@
    "    return total_loss / (len(eval_data) - 1)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "d7c6a1e0",
+   "metadata": {},
+   "source": [
+    "### now we can start training the model while saving best one"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "09c4d4ce",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [
    {
     "name": "stdout",
@ -689,11 +764,21 @@
    "    scheduler.step()"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "565b5aa4",
+   "metadata": {},
+   "source": [
+    "### print info about best model after training"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "12fdd0aa",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
   "outputs": [
    {
     "name": "stdout",
@ -714,6 +799,14 @@
    "print('=' * 89)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "12031065",
+   "metadata": {},
+   "source": [
+    "## Now we can try to predict based on trained model"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "e685d3e1",
@ -724,7 +817,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 274,
+   "execution_count": 300,
   "id": "cfb30fe0",
   "metadata": {},
   "outputs": [],
@ -735,9 +828,17 @@
    "]"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "054ada71",
+   "metadata": {},
+   "source": [
+    "### define source mask for model"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 275,
+   "execution_count": 301,
   "id": "305853e8",
   "metadata": {},
   "outputs": [],
@ -746,9 +847,17 @@
    "src_mask = generate_square_subsequent_mask(bptt).to(device)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "4635a73e",
+   "metadata": {},
+   "source": [
+    "### define iterator for predict batch and init to generator"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 276,
+   "execution_count": 302,
   "id": "afe585d6",
   "metadata": {},
   "outputs": [],
@ -760,16 +869,16 @@
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f7ac6188",
+   "cell_type": "markdown",
+   "id": "1c171c8c",
   "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "### load data into tensor for model to process"
+   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 278,
+   "execution_count": 303,
   "id": "0788b045",
   "metadata": {},
   "outputs": [],
@ -779,7 +888,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 279,
+   "execution_count": 308,
   "id": "8bfaa8bd",
   "metadata": {},
   "outputs": [
@ -789,7 +898,7 @@
       "[tensor([  3, 555,  16]), tensor([ 3, 76, 16])]"
      ]
     },
-     "execution_count": 279,
+     "execution_count": 308,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -798,10 +907,18 @@
    "data"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "99132b3d",
+   "metadata": {},
+   "source": [
+    "### check device once again (prob not needed)"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": 280,
-   "id": "dd0e7310",
+   "execution_count": 309,
+   "id": "b8c50c8c",
   "metadata": {},
   "outputs": [
    {
@ -810,7 +927,7 @@
       "device(type='cuda')"
      ]
     },
-     "execution_count": 280,
+     "execution_count": 309,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -821,87 +938,21 @@
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": 281,
-   "id": "1728f0fd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for d in data:\n",
-    "        d.to(device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 282,
-   "id": "49d27864",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "best_model.eval()\n",
-    "for batch in data:\n",
-    "    output = best_model(batch.to(device), src_mask)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 283,
-   "id": "a3404169",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "result_np = []\n",
-    "pred_np = output.cpu().detach().numpy()\n",
-    "for el in pred_np:\n",
-    "    result_np.append(el)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 284,
-   "id": "c7064c0c",
+   "cell_type": "markdown",
+   "id": "05766f6b",
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[array([[-0.5258564 ,  4.6108465 ,  4.8358154 , ..., -0.46871045,\n",
-       "         -0.04386039, -0.13068362],\n",
-       "        [-0.35341978,  8.821181  ,  9.57295   , ..., -1.2820313 ,\n",
-       "         -0.989242  , -0.15542248],\n",
-       "        [-0.39717233,  5.7111125 ,  6.4295497 , ..., -0.27339834,\n",
-       "         -1.5333815 ,  0.16042188]], dtype=float32),\n",
-       " array([[-0.5291481 ,  4.6452312 ,  4.7958803 , ..., -0.4642661 ,\n",
-       "         -0.04427804, -0.12225106],\n",
-       "        [-0.35347146,  8.824585  ,  9.5098095 , ..., -1.2693769 ,\n",
-       "         -0.97772634, -0.13521233],\n",
-       "        [-0.39733842,  5.693817  ,  6.368334  , ..., -0.26423275,\n",
-       "         -1.527182  ,  0.16518843]], dtype=float32),\n",
-       " array([[-0.53349733,  4.6777644 ,  4.7953978 , ..., -0.4346264 ,\n",
-       "         -0.03433151, -0.11583059],\n",
-       "        [-0.3695631 ,  8.82613   ,  9.477964  , ..., -1.2404828 ,\n",
-       "         -0.9594678 , -0.11550146],\n",
-       "        [-0.41203997,  5.699034  ,  6.341655  , ..., -0.24370295,\n",
-       "         -1.5179048 ,  0.16230991]], dtype=float32)]"
-      ]
-     },
-     "execution_count": 284,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
   "source": [
-    "result_np"
+    "### define predict function"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 285,
-   "id": "679e2316",
+   "execution_count": 317,
+   "id": "0475bcc9",
   "metadata": {},
   "outputs": [],
   "source": [
-    "def predict(input_line, n_predictions=1):\n",
+    "def predict(input_line, n_predictions=3):\n",
    "    print('\\n> %s' % input_line)\n",
    "    with torch.no_grad():\n",
    "        output = best_model(input_line.to(device), src_mask)\n",
@ -920,31 +971,64 @@
    "            print(vocab.lookup_token(predict_token_index))\n",
    "            #print(category_index)\n",
    "            #print('(%.2f) %s' % (value, all_categories[category_index]))\n",
-    "            #predictions.append([value, all_categories[category_index]])"
+    "            predictions.append(vocab.lookup_token(predict_token_index))\n",
+    "        return predictions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ad2f64b",
+   "metadata": {},
+   "source": [
+    "### Execute prediction and display predicted values"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 286,
-   "id": "03389137",
+   "execution_count": 318,
+   "id": "55b73ea1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
+      "[tensor([  3, 555,  16]), tensor([ 3, 76, 16])]\n",
      "\n",
      "> tensor([  3, 555,  16])\n",
      "tumors\n",
+      "the\n",
+      "the\n",
+      "The brain is\n",
+      "Possible continuations:\n",
+      "0 :  tumors\n",
+      "0 :  the\n",
+      "0 :  the\n",
      "\n",
      "> tensor([ 3, 76, 16])\n",
-      "cancer\n"
+      "cancer\n",
+      "most\n",
+      "the\n",
+      "The lung is\n",
+      "Possible continuations:\n",
+      "0 :  cancer\n",
+      "0 :  most\n",
+      "0 :  the\n"
     ]
    }
   ],
   "source": [
+    "print(data)\n",
+    "count = 0\n",
+    "num_of_pred = 3\n",
    "for d in data:\n",
-    "    predict(d)"
+    "    predictions = predict(d, num_of_pred)\n",
+    "    print(input_batch[count])\n",
+    "    print(\"Possible continuations:\")\n",
+    "    for j in range(len(predictions)):\n",
+    "        print(i, \": \", predictions[j])\n",
+    "    count = count + 1\n",
+    "    "
   ]
  }
 ],