diff --git a/01_NLP_Annotations.ipynb b/01_NLP_Annotations.ipynb index 1e3e9f9..12a1f73 100644 --- a/01_NLP_Annotations.ipynb +++ b/01_NLP_Annotations.ipynb @@ -97,6 +97,23 @@ "print('Total Positive Pneumonia Documents : {0}'.format(total_positives))" ] }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Positive Pneumonia Documents : HOLA\n" + ] + } + ], + "source": [ + "print('Total Positive Pneumonia Documents : {0}{1}{2}{3}'.format(\"H\",\"O\",\"L\",\"A\"))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -126,7 +143,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 4, +======= "execution_count": 3, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": { "scrolled": true }, @@ -143,7 +164,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 5, +======= "execution_count": 4, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": { "scrolled": false }, @@ -151,7 +176,11 @@ { "data": { "application/vnd.jupyter.widget-view+json": { +<<<<<<< HEAD + "model_id": "4c896d7363204445a395324af7a18c08", +======= "model_id": "4efb9d6973c14715963dbf7bad4a3c14", +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "version_major": 2, "version_minor": 0 }, diff --git a/04_NLP_Evaluation_Metrics.ipynb b/04_NLP_Evaluation_Metrics.ipynb index f244f60..0e0b1fd 100644 --- a/04_NLP_Evaluation_Metrics.ipynb +++ b/04_NLP_Evaluation_Metrics.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -36,7 +36,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -48,7 +48,7 @@ "" ] }, - "execution_count": 11, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } diff --git a/06_Keyword_Searching_and_ErrorAnalysis.ipynb b/06_Keyword_Searching_and_ErrorAnalysis.ipynb index 0a8a583..cfef71e 100644 --- a/06_Keyword_Searching_and_ErrorAnalysis.ipynb +++ b/06_Keyword_Searching_and_ErrorAnalysis.ipynb @@ -11,7 +11,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 2, +======= "execution_count": 3, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [], "source": [ @@ -28,7 +32,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 3, +======= "execution_count": 4, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [], "source": [ @@ -54,7 +62,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 60, +======= "execution_count": 6, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [], "source": [ @@ -63,8 +75,15 @@ " self.keywords = set()\n", " def predict(self, text):\n", " prediction = 0\n", +<<<<<<< HEAD + " #for word in text.split():\n", + " for keyword in self.keywords:\n", + " if keyword in text:\n", + " return 1\n", +======= " \n", "# your code here\n", +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 " return prediction\n", " \n" ] @@ -73,12 +92,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Test the function you just wrote by adding one keyword to the set: 'pneumonia'" + "Test the function you just wrote by adding one keyword to the set: 'pneumonia'\n" ] }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 61, +======= "execution_count": 7, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { @@ -96,6 +119,11 @@ "source": [ "keyword_classifier = KeywordClassifier()\n", "keyword_classifier.keywords.add('pneumonia')\n", + "\n", + "#keyword_classifier.keywords.add('consolidation')\n", + "#keyword_classifier.keywords.add('infiltrate')\n", + "#keyword_classifier.keywords.add('fever')\n", + "#keyword_classifier.keywords.add('cough')\n", "annotated_doc_map = read_doc_annotations('data/training_v2.zip')\n", "print('Total Annotated Documents : {0}'.format(len(annotated_doc_map)))\n", "\n", @@ -124,7 +152,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 62, +======= "execution_count": 8, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [], "source": [ @@ -140,7 +172,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 63, +======= "execution_count": 9, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { @@ -173,13 +209,21 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 34, +======= "execution_count": 10, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { +<<<<<<< HEAD + "model_id": "990476361b984d86ab280c0b5d512c63", +======= "model_id": "ae0e9f2e3d744cdf9b63048411812470", +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "version_major": 2, "version_minor": 0 }, @@ -213,7 +257,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 64, +======= "execution_count": 11, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [], "source": [ @@ -265,7 +313,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 66, +======= "execution_count": 12, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": { "scrolled": false }, @@ -356,7 +408,16 @@ "source": [ "fn=list_false_negatives(annotated_doc_map, keyword_classifier.predict)\n", "docs=list(fn.keys())\n", - "display(HTML(snippets_markup(fn)))" + "display(HTML(snippets_markup(fn)))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(fn)" ] }, { @@ -368,19 +429,23 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 57, +======= "execution_count": 13, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'pneumonia', 'consolidation'}\n" + "{'pneumonia', 'Pneumonia', 'consolidation', 'infiltrate', 'Consolidation', 'consolidations'}\n" ] } ], "source": [ - "keyword_classifier.keywords = {'pneumonia', 'consolidation'}\n", + "keyword_classifier.keywords = {'pneumonia', 'consolidation','Pneumonia','infiltrate','Consolidation','consolidations'}\n", "print(keyword_classifier.keywords)" ] }, @@ -395,16 +460,20 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 59, +======= "execution_count": 19, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Precision : 0.6829268292682927\n", - "Recall : 0.8235294117647058\n", - "F1: 0.7466666666666667\n", + "Precision : 0.6444444444444445\n", + "Recall : 0.8529411764705882\n", + "F1: 0.7341772151898734\n", "\n", "Confusion Matrix : \n" ] @@ -442,13 +511,13 @@ " \n", " \n", " 0\n", - " 23\n", - " 13\n", + " 20\n", + " 16\n", " \n", " \n", " 1\n", - " 6\n", - " 28\n", + " 5\n", + " 29\n", " \n", " \n", "\n", @@ -457,8 +526,8 @@ "text/plain": [ "Predicted 0 1\n", "Actual \n", - "0 23 13\n", - "1 6 28" + "0 20 16\n", + "1 5 29" ] }, "metadata": {}, @@ -576,11 +645,20 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 67, +======= "execution_count": 38, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { "data": { +<<<<<<< HEAD + "text/html": [ + "Correct!" + ], +======= "application/vnd.jupyter.widget-view+json": { "model_id": "9ab2ea98aac94982bc3ca94b1fe48444", "version_major": 2, @@ -600,8 +678,9 @@ "version_major": 2, "version_minor": 0 }, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "text/plain": [ - "Button(description='Submit', style=ButtonStyle())" + "" ] }, "metadata": {}, @@ -615,7 +694,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 70, +======= "execution_count": 37, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { @@ -632,6 +715,11 @@ }, { "data": { +<<<<<<< HEAD + "text/html": [ + "Correct!" + ], +======= "application/vnd.jupyter.widget-view+json": { "model_id": "b969def6ce564b3186858a959550bcf4", "version_major": 2, @@ -651,8 +739,9 @@ "version_major": 2, "version_minor": 0 }, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "text/plain": [ - "Button(description='Submit', style=ButtonStyle())" + "" ] }, "metadata": {}, @@ -666,7 +755,11 @@ }, { "cell_type": "code", +<<<<<<< HEAD + "execution_count": 71, +======= "execution_count": 39, +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 "metadata": {}, "outputs": [ { @@ -686,7 +779,11 @@ { "data": { "text/html": [ +<<<<<<< HEAD + "Correct" +======= "This is a better answer" +>>>>>>> 056fe1e55867222710aeb8ef2c50cc308d58a459 ], "text/plain": [ "" diff --git a/08_NLP_Regex_for_Concept_Extraction.ipynb b/08_NLP_Regex_for_Concept_Extraction.ipynb index 63572fd..769e090 100644 --- a/08_NLP_Regex_for_Concept_Extraction.ipynb +++ b/08_NLP_Regex_for_Concept_Extraction.ipynb @@ -993,9 +993,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "txt=\"cardiovascular: patient has cardiovascular\"\n", "re.search('cardiovascular$', txt)" @@ -1012,10 +1023,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "match: cardiovascular\n", + "span: 0 14\n" + ] + } + ], "source": [ + "import re\n", "matched=re.match('^cardiovascular', txt)\n", "print(matched)\n", "print(\"match:\", matched.group())\n", @@ -1058,9 +1080,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " \n", + "\n", + "retrieved in as a tuple/ full span\n", + "all parts: ('6', '15', '2015') = admission date:6/15-2015 \n", + "\n", + "retrieved as a dictionary\n", + "{'month': '6', 'day': '15', 'year': '2015'} \n", + "\n", + "retrieved in parts\n", + "month: 6 = 6\n", + "day: 15 = 15\n", + "year: 2015 = 2015\n", + "{'month': '6', 'day': '15', 'year': '2015'}\n" + ] + } + ], "source": [ "txt=\"admission date:6/15-2015.\"\n", "fullSpan=re.match(r\"admission date:\\s*(?P\\d{1,2})[-|\\/](?P\\d{1,2})-(?P\\d{2,4})\", txt)\n", diff --git a/09_NLP_pneumonia_pyConText_targets_and_modifiers.ipynb b/09_NLP_pneumonia_pyConText_targets_and_modifiers.ipynb index c0b2f91..52ecb9c 100644 --- a/09_NLP_pneumonia_pyConText_targets_and_modifiers.ipynb +++ b/09_NLP_pneumonia_pyConText_targets_and_modifiers.ipynb @@ -182,13 +182,13 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "NodeDataView({ 99914448661365161132906157324590123994 pneumonia ['evidence_of_pneumonia'] : {'category': 'target'}})" + "NodeDataView({ 249612704042745789883698965661836381159 pneumonia ['evidence_of_pneumonia'] : {'category': 'target'}})" ] }, "metadata": {}, @@ -217,7 +217,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -243,7 +243,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -252,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -277,12 +277,13 @@ "source": [ "## We didn't mark up a target for \"pneumonias\" since we only had the singular variant \"pneumonia\"\n", "## We can augment our targets by modifying a yaml file (.yml). A starter yaml file is included in our course resources:\n", - "KB/pneumonia_targets.yml" + "KB/pneumonia_targets.yml\n", + "#/edit/decart_rule_based_nlp/KB/pneumonia_targets.yml" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -349,7 +350,7 @@ "2 infiltrate EVIDENCE_OF_PNEUMONIA" ] }, - "execution_count": 8, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -367,7 +368,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -375,13 +376,13 @@ "output_type": "stream", "text": [ "Loading pneumonia;targets from : \n", - "\t/home/jianlins/work/decart_rule_based_nlp/KB/pneumonia_targets.yml\n" + "\t/home/gastonq/decart_rule_based_nlp/KB/pneumonia_targets.yml\n" ] }, { "data": { "text/plain": [ - "[literal<>; category<<['evidence_of_pneumonia']>>; re<<>>; rule<<>>,\n", + "[literal<>; category<<['evidence_of_pneumonia']>>; re<<>>; rule<<>>,\n", " literal<>; category<<['evidence_of_pneumonia']>>; re<<\\bpneumonia[s]?\\b>>; rule<<>>,\n", " literal<>; category<<['evidence_of_pneumonia']>>; re<<>>; rule<<>>]" ] @@ -400,20 +401,49 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/gastonq/nltk_data...\n", + "[nltk_data] Unzipping tokenizers/punkt.zip.\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import nltk\n", + "nltk.download('punkt')" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loading pneumonia;targets from : /home/jianlins/work/decart_rule_based_nlp/KB/pneumonia_targets.yml\n" + "Loading pneumonia;targets from : /home/gastonq/decart_rule_based_nlp/KB/pneumonia_targets.yml\n" ] }, { "data": { "text/plain": [ - "NodeDataView({ 100246180146938886484983829586404969434 pneumonia ['evidence_of_pneumonia'] : {'category': 'target'}, 100245844219529826004192432960055544794 infiltrate ['evidence_of_pneumonia'] : {'category': 'target'}})" + "NodeDataView({ 138546912158370067406741393043076680679 pneumonia ['evidence_of_pneumonia'] : {'category': 'target'}, 138546691111796652609239507055455243239 consolidation ['evidence_of_pneumonia'] : {'category': 'target'}, 138546489872263866377822019453821389799 infiltrate ['evidence_of_pneumonia'] : {'category': 'target'}})" ] }, "metadata": {}, @@ -425,6 +455,7 @@ "targets2 = []\n", "modifiers2 = []\n", "\n", + "#pneumonia_targets_file = 'gastonq/edit/decart_rule_based_nlp/KB/pneumonia_targets.yml'\n", "pneumonia_targets_file = 'KB/pneumonia_targets.yml'\n", "\n", "# so now let's set this up with more variants of \"EVIDENCE_OF_PNEUMONIA\"\n", @@ -448,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -475,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -507,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -550,7 +581,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -559,7 +590,7 @@ "'INCORRECT. Please try again. See the documentation above for pyConText itemData ordering'" ] }, - "execution_count": 14, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -614,7 +645,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 33, "metadata": {}, "outputs": [ { @@ -691,9 +722,9 @@ "text": [ "****************\n", "Performance for Classifier 2 : 3 total Targets\n", - "Precision : 0.5185185185185185\n", + "Precision : 0.6666666666666666\n", "Recall : 0.8235294117647058\n", - "F1: 0.6363636363636364\n", + "F1: 0.7368421052631577\n", "\n", "Confusion Matrix : \n" ] @@ -731,8 +762,8 @@ " \n", " \n", " 0\n", - " 10\n", - " 26\n", + " 22\n", + " 14\n", " \n", " \n", " 1\n", @@ -746,7 +777,7 @@ "text/plain": [ "Predicted False True \n", "Actual \n", - "0 10 26\n", + "0 22 14\n", "1 6 28" ] }, @@ -821,7 +852,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -858,7 +889,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 35, "metadata": {}, "outputs": [], "source": [ @@ -868,7 +899,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 36, "metadata": {}, "outputs": [], "source": [ @@ -886,7 +917,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -919,7 +950,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -952,7 +983,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -985,7 +1016,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -1019,7 +1050,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -1028,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -1050,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -1082,7 +1113,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -1123,22 +1154,22 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'INCORRECT. It is not clear what you passed in. Please see the list of possible answers'" + "'CORRECT. Since this will modifier targets before it, it would properly modify pnuemonia in the sentence : \"Pneumonia was ruled out\"'" ] }, - "execution_count": 28, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "modifier_directionality_quiz('UPDATE_ME')" + "modifier_directionality_quiz('backward')" ] }, { @@ -1150,14 +1181,14 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 83, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Total Modifiers Loaded for pipeline #3 : [8]\n", + "Total Modifiers Loaded for pipeline #3 : [12]\n", "Total Targets Loaded for pipeline #3 : [3]\n" ] } @@ -1183,9 +1214,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 84, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "

PORTABLE CHEST: Comparison made to prior film from X:XX a.m. the same day. The ET tube and nasogastric tube remain in good position. Cardiac and mediastinal contours are stable. No acute changes are seen within the lung parenchyma; specifically, there is no evidence of new infiltrate (skin folds do project over the right lung). No consolidation on either side. IMPRESSION: No evidence of pneumonia.

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# prepare some colors for displaying any markup we might see\n", "colors = {\n", @@ -1208,9 +1252,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 85, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\t\t\t " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "# And use Brat style display:\n", "view_pycontext_output(context3)" @@ -1225,9 +1285,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 86, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Marking up all documents...\n", + "DONE Marking up all documents...\n", + "CPU times: user 872 ms, sys: 28.4 ms, total: 900 ms\n", + "Wall time: 882 ms\n" + ] + } + ], "source": [ "%%time\n", "# NOTE : This is a \"magic\" command to Jupyter to time the execution of this entire cell\n", @@ -1245,7 +1316,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 87, "metadata": {}, "outputs": [], "source": [ @@ -1268,9 +1339,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 88, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "fac3a32dca1b4e7a89de23597c7715be", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=0, description='i', max=69), Output()), _dom_classes=('widget-interact',…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "view_pycontext_graph(report_results)" ] diff --git a/10_NLP_DocumentClassification.ipynb b/10_NLP_DocumentClassification.ipynb index 9d7c288..65d0b6c 100644 --- a/10_NLP_DocumentClassification.ipynb +++ b/10_NLP_DocumentClassification.ipynb @@ -55,14 +55,14 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "# Let's just consider the example at the beginning as a document,\n", "# and run pyConText to get markups\n", "\n", - "report = \"Right pleural effusion can be excluded. Likely small left pleural effusion. \"\n", + "report = \"NO Right pleural effusion can be excluded. NO Likely small left pleural effusion. \"\n", "\n", "targets = itemData([\"effusion\", \"EVIDENCE_OF_PNEUMONIA\", r\"effusion[s]?\", \"\"])\n", "\n", @@ -73,14 +73,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\t\t\t " ], @@ -94,8 +94,7 @@ ], "source": [ "# To confirm what we get from pyConText\n", - "view_pycontext_output(markups)\n", - " \n" + "view_pycontext_output(markups)\n" ] }, { @@ -164,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 49, "metadata": {}, "outputs": [], "source": [ @@ -188,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -215,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -225,7 +224,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -262,8 +261,8 @@ " 0\n", " T0\n", " Target\n", - " 14\n", - " 22\n", + " 17\n", + " 25\n", " effusion\n", " evidence_of_pneumonia\n", " \n", @@ -271,17 +270,17 @@ " 1\n", " T1\n", " Modifier\n", - " 29\n", - " 38\n", - " excluded\n", + " 0\n", + " 2\n", + " NO\n", " definite_negated_existence\n", " \n", " \n", " 2\n", " T2\n", " Target\n", - " 66\n", - " 74\n", + " 72\n", + " 80\n", " effusion\n", " evidence_of_pneumonia\n", " \n", @@ -289,24 +288,24 @@ " 3\n", " T3\n", " Modifier\n", - " 40\n", - " 46\n", - " Likely\n", - " probable_existence\n", + " 43\n", + " 45\n", + " NO\n", + " definite_negated_existence\n", " \n", " \n", "\n", "" ], "text/plain": [ - " markup_id vis_category start end txt type\n", - "0 T0 Target 14 22 effusion evidence_of_pneumonia\n", - "1 T1 Modifier 29 38 excluded definite_negated_existence\n", - "2 T2 Target 66 74 effusion evidence_of_pneumonia\n", - "3 T3 Modifier 40 46 Likely probable_existence" + " markup_id vis_category start end txt type\n", + "0 T0 Target 17 25 effusion evidence_of_pneumonia\n", + "1 T1 Modifier 0 2 NO definite_negated_existence\n", + "2 T2 Target 72 80 effusion evidence_of_pneumonia\n", + "3 T3 Modifier 43 45 NO definite_negated_existence" ] }, - "execution_count": 8, + "execution_count": 52, "metadata": {}, "output_type": "execute_result" } @@ -318,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 53, "metadata": { "scrolled": true }, @@ -365,7 +364,7 @@ " \n", " 1\n", " R1\n", - " probable_existence\n", + " definite_negated_existence\n", " Modifier\n", " T3\n", " Target\n", @@ -378,10 +377,10 @@ "text/plain": [ " relation_id type arg1_cate arg1_id arg2_cate arg2_id\n", "0 R0 definite_negated_existence Modifier T1 Target T0\n", - "1 R1 probable_existence Modifier T3 Target T2" + "1 R1 definite_negated_existence Modifier T3 Target T2" ] }, - "execution_count": 9, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } @@ -400,16 +399,21 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['pos_evidence', 'pos_evidence', 'pos_evidence', 'pos_evidence']" + "['pos_evidence',\n", + " 'neg_evidence',\n", + " 'pos_evidence',\n", + " 'neg_evidence',\n", + " 'pos_evidence',\n", + " 'pos_evidence']" ] }, - "execution_count": 10, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -428,16 +432,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'pneumonia_doc_no'" + "'pneumonia_doc_yes'" ] }, - "execution_count": 11, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -457,9 +461,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'pneumonia_doc_yes'" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "doc_conclusion = classifier.classify_doc(report)\n", "doc_conclusion" @@ -474,9 +489,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\t\t\t " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "view_pycontext_output(classifier.get_last_context_doc())" ] diff --git a/11_NLP_ErrorAnalysis2.ipynb b/11_NLP_ErrorAnalysis2.ipynb index 514645f..64a4357 100644 --- a/11_NLP_ErrorAnalysis2.ipynb +++ b/11_NLP_ErrorAnalysis2.ipynb @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 281, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 357, "metadata": {}, "outputs": [], "source": [ @@ -99,21 +99,32 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 358, + "metadata": {}, + "outputs": [], + "source": [ + "#?DocumentClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 395, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Reading annotations from file : data/training_v2.zip\n", - "Opening local file : data/training_v2.zip\n" + "Reading annotations from file : data/test_v2.zip\n", + "Opening local file : data/test_v2.zip\n" ] } ], "source": [ "#Read in the training documents and annotations\n", - "annotated_doc_map = read_doc_annotations('data/training_v2.zip')\n", + "annotated_doc_map = read_doc_annotations('data/test_v2.zip')\n", + "#annotated_doc_map = read_doc_annotations('data/training_v2.zip')\n", + "\n", "\n", "#Here we initiate our DocumentClassifier directly through rule files:\n", "#Change the file names if you use different files \n", @@ -130,22 +141,22 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 396, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Precision : 0.5185185185185185\n", - "Recall : 0.8235294117647058\n", - "F1: 0.6363636363636364\n", + "Precision : 0.8125\n", + "Recall : 0.9285714285714286\n", + "F1: 0.8666666666666666\n", "\n", "Confusion Matrix : \n", "Predicted 0 1\n", "Actual \n", - "0 10 26\n", - "1 6 28\n" + "0 13 3\n", + "1 1 13\n" ] } ], @@ -163,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 397, "metadata": {}, "outputs": [ { @@ -171,9 +182,9 @@ "output_type": "stream", "text": [ "Start to evaluate against reference standards...\n", - "Precision : 0.519\n", - "Recall : 0.824\n", - "F1: 0.636\n" + "Precision : 0.812\n", + "Recall : 0.929\n", + "F1: 0.867\n" ] }, { @@ -209,13 +220,13 @@ " \n", " \n", " 1\n", - " 28\n", - " 6\n", + " 13\n", + " 1\n", " \n", " \n", " 0\n", - " 26\n", - " 10\n", + " 3\n", + " 13\n", " \n", " \n", "\n", @@ -224,8 +235,8 @@ "text/plain": [ "Predicted 1 0\n", "Actual \n", - "1 28 6\n", - "0 26 10" + "1 13 1\n", + "0 3 13" ] }, "metadata": {}, @@ -256,47 +267,22 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 398, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
document nameSnippets
subject_id_150_hadm_id_12121
es are\n", - " unremarkable.\n", + "
document nameSnippets
subject_id_4276_hadm_id_25705
the carina.\n", " \n", - " IMPRESSION: Small focal opacity in right upper lobe and right paratracheal\n", - " opacity. In the sett
CHEST PA AND LATERAL: The heart size is normal. There is an area of\n", - " increased opacity lateral to the right paratracheal stripe. In the
pacity lateral to the right paratracheal stripe. In the right\n", - " upper lobe, there is a small focal opacity. The lungs are otherwise clear.\n", - " There are no
subject_id_5472_hadm_id_11987
id SVC. \n", - " There is no apparent pneumothorax. A right IJ line, NGT, and ETT are\n", - " unchanged as are the parenchymal changes in the lungs compared to the earlier\n", - " chest x-ray this mor
subject_id_7027_hadm_id_33117
ossibility of free\n", - " intraperitoneal air.\n", - " 2) Left lower lobe atelectasis/consolidation.\n", - " 3) Moderate gastric distention with multiple
stinal and hilar contours are\n", - " unremarkable. There is patchy opacity at the left lower lobe representing\n", - " either atelectasis or consolidation. No definite free air is identified,\n", - " howeve
subject_id_7272_hadm_id_19098
rt failure with bilateral pleural effusions.\n", - " Collapse and/or consolidation at the bases bilaterally.\n", - "\n", - "
rall heart size is difficult to assess. There is dense retrocardiac\n", - " opacity, possibly secondary to collapse and/or consolidation in the left lower\n", - " lobe. There is also a rig
ion in the left lower\n", - " lobe. There is also a right lower lobe and middle lobe opacity consistent\n", - " with collapse and/or consolidation.\n", + " IMPRESSION:\n", " \n", - " IMPRESSION: Persistent left heart fai
subject_id_7525_hadm_id_19141
n distal superior vena cava, unchanged. There is marked\n", - " improvement of the bilateral consolidations, especially on the right. The NG\n", - " tube tip is
subject_id_9082_hadm_id_29395
tient with seizure.\n", + " 1. New bibasilar opacities, which may represent atelectasis or aspiration\n", + " pneumonia.\n", " \n", - " Low lung volumes. Bilateral basilar opacities, considerably larger at the\n", - " left base than at
hyroid.\n", - " \n", - " IMPRESSION: Lung volumes with bilateral basilar opacities.\n", - " \n", - " Question substernal thyroid enlargemen
" + " 2. Right central venous catheter with
FINDINGS: Since prior examination, has been interval development of bibasilar\n", + " opacities, may represent atelectasis or aspiration pneumonia. Right- sided\n", + " subclavian approach central ve
" ], "text/plain": [ "" @@ -323,18 +309,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 399, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "22a4ec421ab7440f8f8d0983ff9ccbfd", + "model_id": "7320dc0ba69040138d47d9f27bb21ae6", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "interactive(children=(IntSlider(value=0, description='i', max=25), Output()), _dom_classes=('widget-interact',…" + "interactive(children=(IntSlider(value=0, description='i', max=2), Output()), _dom_classes=('widget-interact',)…" ] }, "metadata": {}, @@ -357,15 +343,22 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 389, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "No documents to view.\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "78987c4514e4405f91796de0c87daed5", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(IntSlider(value=0, description='i', max=0), Output()), _dom_classes=('widget-interact',)…" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -420,7 +413,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "977fd3fee8544ce0b25582a39cb041f3", + "model_id": "69e84d9095354af2807157c4a488abb4", "version_major": 2, "version_minor": 0 }, @@ -434,7 +427,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "712dd4de423e42f2bbb27df919dd1977", + "model_id": "ac48e5e98f0d41fea5a32afb213ca927", "version_major": 2, "version_minor": 0 }, @@ -478,7 +471,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a37f9a4ff0284ba6a1e52d41db33a43a", + "model_id": "aa29111d8ddc40c7aadfc308eac36671", "version_major": 2, "version_minor": 0 }, @@ -492,7 +485,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ae0d2e99cfac454d91b221e235c83507", + "model_id": "849a2d8fa6b74b42b2021e0f75a38be4", "version_major": 2, "version_minor": 0 }, @@ -536,7 +529,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2391d3c571c348c8a4f10dd59a823260", + "model_id": "ef7def291aff42189a8e9b393e701bf5", "version_major": 2, "version_minor": 0 }, @@ -550,7 +543,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "d04b7760c6964a139de089d3d8f98cfb", + "model_id": "ed474429133f4d3f8df7b8666af5b3cb", "version_major": 2, "version_minor": 0 }, @@ -575,6 +568,13 @@ "Presenters : Dr. Wendy Chapman, Kelly Peterson, Alec Chapman, Jianlin Shi
Acknowledgement: Many thanks to Olga Patterson because part of the materials are adopted from his previous work." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/24_brat_workspace_setup.ipynb b/24_brat_workspace_setup.ipynb index 0276ad1..1c0c1d3 100644 --- a/24_brat_workspace_setup.ipynb +++ b/24_brat_workspace_setup.ipynb @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ diff --git a/27_Calculate_Agreement_for_Brat_Annotations.ipynb b/27_Calculate_Agreement_for_Brat_Annotations.ipynb index 99de365..ba0d538 100644 --- a/27_Calculate_Agreement_for_Brat_Annotations.ipynb +++ b/27_Calculate_Agreement_for_Brat_Annotations.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -25,7 +25,7 @@ "Requirement already satisfied: sortedcontainers<3.0,>=2.0 in /opt/conda/lib/python3.7/site-packages (from intervaltree) (2.1.0)\n", "Building wheels for collected packages: intervaltree\n", " Building wheel for intervaltree (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Stored in directory: /home/jianlins/.cache/pip/wheels/08/99/c0/5a5942f5b9567c59c14aac76f95a70bf11dccc71240b91ebf5\n", + "\u001b[?25h Stored in directory: /home/gastonq/.cache/pip/wheels/08/99/c0/5a5942f5b9567c59c14aac76f95a70bf11dccc71240b91ebf5\n", "Successfully built intervaltree\n", "Installing collected packages: intervaltree\n", "Successfully installed intervaltree-3.0.2\n" @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -103,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -123,74 +123,17 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "metadata": { "scrolled": true }, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "EVIDENCE_OF_PNEUMONIA\n", - "0 0 14 None\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
B+B-
A+00.0
A-14NaN
\n", - "
" - ], - "text/plain": [ - " B+ B-\n", - "A+ 0 0.0\n", - "A- 14 NaN" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", "text": [ "PNEUMONIA_DOC_NO\n", - "0 0 4 None\n" + "4 2 0 None\n" ] }, { @@ -221,12 +164,12 @@ " \n", " \n", " A+\n", - " 0\n", - " 0.0\n", + " 4\n", + " 2.0\n", " \n", " \n", " A-\n", - " 4\n", + " 0\n", " NaN\n", " \n", " \n", @@ -235,8 +178,8 @@ ], "text/plain": [ " B+ B-\n", - "A+ 0 0.0\n", - "A- 4 NaN" + "A+ 4 2.0\n", + "A- 0 NaN" ] }, "metadata": {}, @@ -247,7 +190,7 @@ "output_type": "stream", "text": [ "PNEUMONIA_DOC_YES\n", - "0 0 6 None\n" + "4 0 7 None\n" ] }, { @@ -278,12 +221,12 @@ " \n", " \n", " A+\n", - " 0\n", + " 4\n", " 0.0\n", " \n", " \n", " A-\n", - " 6\n", + " 7\n", " NaN\n", " \n", " \n", @@ -292,8 +235,8 @@ ], "text/plain": [ " B+ B-\n", - "A+ 0 0.0\n", - "A- 6 NaN" + "A+ 4 0.0\n", + "A- 7 NaN" ] }, "metadata": {}, @@ -336,7 +279,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -346,15 +289,72 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 11, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CONSOLIDATION\n", + "1 3 1 None\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
B+B-
A+13.0
A-1NaN
\n", + "
" + ], + "text/plain": [ + " B+ B-\n", + "A+ 1 3.0\n", + "A- 1 NaN" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ "EVIDENCE_OF_PNEUMONIA\n", - "0 0 14 None\n" + "0 0 13 None\n" ] }, { @@ -390,7 +390,7 @@ " \n", " \n", " A-\n", - " 14\n", + " 13\n", " NaN\n", " \n", " \n", @@ -400,7 +400,7 @@ "text/plain": [ " B+ B-\n", "A+ 0 0.0\n", - "A- 14 NaN" + "A- 13 NaN" ] }, "metadata": {}, @@ -410,8 +410,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "PNEUMONIA_DOC_NO\n", - "0 0 4 None\n" + "LOCAL_INFILTRATE\n", + "0 1 1 None\n" ] }, { @@ -443,11 +443,125 @@ " \n", " A+\n", " 0\n", - " 0.0\n", + " 1.0\n", + " \n", + " \n", + " A-\n", + " 1\n", + " NaN\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " B+ B-\n", + "A+ 0 1.0\n", + "A- 1 NaN" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PNEUMONIA\n", + "0 9 0 None\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + "
B+B-
A+09.0
A-0NaN
\n", + "
" + ], + "text/plain": [ + " B+ B-\n", + "A+ 0 9.0\n", + "A- 0 NaN" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PNEUMONIA_DOC_NO\n", + "4 2 0 None\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -456,8 +570,8 @@ ], "text/plain": [ " B+ B-\n", - "A+ 0 0.0\n", - "A- 4 NaN" + "A+ 4 2.0\n", + "A- 0 NaN" ] }, "metadata": {}, @@ -468,7 +582,7 @@ "output_type": "stream", "text": [ "PNEUMONIA_DOC_YES\n", - "0 0 6 None\n" + "4 0 7 None\n" ] }, { @@ -499,12 +613,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -513,8 +627,8 @@ ], "text/plain": [ " B+ B-\n", - "A+ 0 0.0\n", - "A- 6 NaN" + "A+ 4 0.0\n", + "A- 7 NaN" ] }, "metadata": {}, @@ -547,7 +661,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -556,11 +670,26 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.36363636363636365\n" + ] + } + ], "source": [ - "# your code goes here:\n" + "# your code goes here: observed agreement\n", + "d = 11-(a+b+c)\n", + "#a b\n", + "#c d\n", + "\n", + "obs = (a+d)/(a+b+d+c)\n", + "print(obs)\n", + "\n" ] }, { @@ -581,7 +710,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -589,10 +718,29 @@ "output_type": "stream", "text": [ "PNEUMONIA_DOC_NO\n", - "(0, 0, 4, None)\n", - "\tNo documents to display.\n", + "(4, 2, 0, None)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6a12da18b6ff4464ac4434d0bf5c2ffa", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "HTML(value='
B+B-
A+42.0
A-0NaN
A+040.0
A-67NaN