|
|
@ -29,7 +29,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 25, |
|
|
|
"execution_count": 1, |
|
|
|
"id": "e444b44c", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
@ -41,6 +41,35 @@ |
|
|
|
" from Bio import Entrez, Medline \n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 2, |
|
|
|
"id": "3209935b", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
|
{ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"--2023-01-18 14:27:45-- https://cloud.constantin-fuerst.com/s/944x5BpTQM7GjtF/download\n", |
|
|
|
"Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'\n", |
|
|
|
"Resolving cloud.constantin-fuerst.com (cloud.constantin-fuerst.com)... 95.91.21.14\n", |
|
|
|
"Connecting to cloud.constantin-fuerst.com (cloud.constantin-fuerst.com)|95.91.21.14|:443... connected.\n", |
|
|
|
"HTTP request sent, awaiting response... 200 OK\n", |
|
|
|
"Length: 1100551 (1.0M) [text/plain]\n", |
|
|
|
"Saving to: ‘pubmed-query.txt’\n", |
|
|
|
"\n", |
|
|
|
"pubmed-query.txt 100%[===================>] 1.05M 1.91MB/s in 0.6s \n", |
|
|
|
"\n", |
|
|
|
"2023-01-18 14:27:47 (1.91 MB/s) - ‘pubmed-query.txt’ saved [1100551/1100551]\n", |
|
|
|
"\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"!wget https://cloud.constantin-fuerst.com/s/944x5BpTQM7GjtF/download -O pubmed-query.txt" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"id": "7bf15c30", |
|
|
@ -51,18 +80,14 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 26, |
|
|
|
"execution_count": 5, |
|
|
|
"id": "adfb256a", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"def getPapers(myQuery, maxPapers, myEmail =\"leonard.starke@mailbox.tu-dresden.de\"):\n", |
|
|
|
" # Get articles from PubMed\n", |
|
|
|
" Entrez.email = myEmail\n", |
|
|
|
" record = Entrez.read(Entrez.esearch(db=\"pubmed\", term=myQuery, retmax=maxPapers))\n", |
|
|
|
" idlist = record[\"IdList\"]\n", |
|
|
|
" print(\"\\nThere are %d records for %s.\"%(len(idlist), myQuery.strip()))\n", |
|
|
|
" records = Medline.parse(Entrez.efetch(db=\"pubmed\", id=idlist, rettype=\"medline\", retmode=\"text\"))\n", |
|
|
|
"def getPapers(filename):\n", |
|
|
|
" pubmed_query = open(filename, encoding='utf-8')\n", |
|
|
|
" records = Medline.parse(pubmed_query)\n", |
|
|
|
" return list(records)" |
|
|
|
] |
|
|
|
}, |
|
|
@ -76,17 +101,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 4, |
|
|
|
"id": "39c3b352", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
|
"amountOfPapers = 100000" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 27, |
|
|
|
"execution_count": 6, |
|
|
|
"id": "00481ec9", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
@ -94,15 +109,13 @@ |
|
|
|
"name": "stdout", |
|
|
|
"output_type": "stream", |
|
|
|
"text": [ |
|
|
|
"\n", |
|
|
|
"There are 9999 records for Blood [tiab].\n" |
|
|
|
"Got 290 records from the query text file\n" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|
"myQuery =\"Blood [tiab]\" #query in title and abstract\n", |
|
|
|
"maxPapers = amountOfPapers\n", |
|
|
|
"records = getPapers(myQuery, maxPapers)" |
|
|
|
"records = getPapers(\"pubmed-query.txt\")\n", |
|
|
|
"print(f\"Got {len(records)} records from the query text file\")" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
@ -115,7 +128,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 28, |
|
|
|
"execution_count": 7, |
|
|
|
"id": "dcf5c217", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
@ -177,7 +190,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "markdown", |
|
|
|
"id": "ec1db50b", |
|
|
|
"id": "683ed2fc", |
|
|
|
"metadata": {}, |
|
|
|
"source": [ |
|
|
|
"### import math module" |
|
|
@ -186,7 +199,7 @@ |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 33, |
|
|
|
"id": "eb32bd79", |
|
|
|
"id": "8d2312db", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
|
"source": [ |
|
|
@ -1917,7 +1930,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": 77, |
|
|
|
"execution_count": null, |
|
|
|
"id": "b2895698", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [], |
|
|
@ -1953,7 +1966,7 @@ |
|
|
|
}, |
|
|
|
{ |
|
|
|
"cell_type": "code", |
|
|
|
"execution_count": null, |
|
|
|
"execution_count": 78, |
|
|
|
"id": "13ed9298", |
|
|
|
"metadata": {}, |
|
|
|
"outputs": [ |
|
|
@ -2069,8 +2082,50 @@ |
|
|
|
"Possible continuations:\n", |
|
|
|
"1 : and\n", |
|
|
|
"2 : and\n", |
|
|
|
"3 : the\n", |
|
|
|
"Choose continuation by index:1\n", |
|
|
|
"Text is now:\n", |
|
|
|
"The lung is identified , the and\n", |
|
|
|
"\n", |
|
|
|
"> tensor([ 3, 161, 18, 3, 132, 258, 5])\n", |
|
|
|
"predict token index: [258]\n", |
|
|
|
"predict token index: [1]\n", |
|
|
|
"predict token index: [5]\n", |
|
|
|
"Current input: 0\n", |
|
|
|
"The brain is the most common of\n", |
|
|
|
"Possible continuations:\n", |
|
|
|
"1 : common\n", |
|
|
|
"2 : .\n", |
|
|
|
"3 : of\n", |
|
|
|
"Choose continuation by index:1\n", |
|
|
|
"Text is now:\n", |
|
|
|
"The brain is the most common of common\n", |
|
|
|
"\n", |
|
|
|
"> tensor([ 3, 374, 18, 183, 2, 3, 4])\n", |
|
|
|
"predict token index: [4]\n", |
|
|
|
"predict token index: [4]\n", |
|
|
|
"predict token index: [3]\n", |
|
|
|
"Current input: 1\n", |
|
|
|
"The lung is identified , the and\n", |
|
|
|
"Possible continuations:\n", |
|
|
|
"1 : and\n", |
|
|
|
"2 : and\n", |
|
|
|
"3 : the\n" |
|
|
|
] |
|
|
|
}, |
|
|
|
{ |
|
|
|
"ename": "KeyboardInterrupt", |
|
|
|
"evalue": "Interrupted by user", |
|
|
|
"output_type": "error", |
|
|
|
"traceback": [ |
|
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", |
|
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", |
|
|
|
"Cell \u001b[0;32mIn [78], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpredict_loop\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\n", |
|
|
|
"Cell \u001b[0;32mIn [77], line 17\u001b[0m, in \u001b[0;36mpredict_loop\u001b[0;34m(num_of_pred)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m j \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(predictions)):\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(j \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;124m\"\u001b[39m, predictions[j])\n\u001b[0;32m---> 17\u001b[0m s_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43minput\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mChoose continuation by index:\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124me\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m s_index):\n\u001b[1;32m 19\u001b[0m is_terminated \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", |
|
|
|
"File \u001b[0;32m/usr/lib/python3.10/site-packages/ipykernel/kernelbase.py:1177\u001b[0m, in \u001b[0;36mKernel.raw_input\u001b[0;34m(self, prompt)\u001b[0m\n\u001b[1;32m 1173\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_allow_stdin:\n\u001b[1;32m 1174\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m StdinNotImplementedError(\n\u001b[1;32m 1175\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraw_input was called, but this frontend does not support input requests.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1176\u001b[0m )\n\u001b[0;32m-> 1177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_input_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1178\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1179\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_parent_ident\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshell\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1180\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_parent\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshell\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1181\u001b[0m \u001b[43m \u001b[49m\u001b[43mpassword\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1182\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", |
|
|
|
"File \u001b[0;32m/usr/lib/python3.10/site-packages/ipykernel/kernelbase.py:1219\u001b[0m, in \u001b[0;36mKernel._input_request\u001b[0;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[1;32m 1216\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 1217\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m:\n\u001b[1;32m 1218\u001b[0m \u001b[38;5;66;03m# re-raise KeyboardInterrupt, to truncate traceback\u001b[39;00m\n\u001b[0;32m-> 1219\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyboardInterrupt\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInterrupted by user\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28mNone\u001b[39m\n\u001b[1;32m 1220\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 1221\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlog\u001b[38;5;241m.\u001b[39mwarning(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid Message:\u001b[39m\u001b[38;5;124m\"\u001b[39m, exc_info\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n", |
|
|
|
"\u001b[0;31mKeyboardInterrupt\u001b[0m: Interrupted by user" |
|
|
|
] |
|
|
|
} |
|
|
|
], |
|
|
|
"source": [ |
|
|
|