|
4 | 4 | "cell_type": "markdown",
|
5 | 5 | "metadata": {},
|
6 | 6 | "source": [
|
7 |
| - "# Missing data" |
| 7 | + "# Requirements" |
8 | 8 | ]
|
9 | 9 | },
|
10 | 10 | {
|
11 | 11 | "cell_type": "code",
|
12 |
| - "execution_count": 9, |
| 12 | + "execution_count": 2, |
13 | 13 | "metadata": {},
|
14 | 14 | "outputs": [],
|
15 | 15 | "source": [
|
|
28 | 28 | "cell_type": "markdown",
|
29 | 29 | "metadata": {},
|
30 | 30 | "source": [
|
31 |
| - "## Representing missing values" |
| 31 | + "# Representing missing values" |
32 | 32 | ]
|
33 | 33 | },
|
34 | 34 | {
|
|
40 | 40 | },
|
41 | 41 | {
|
42 | 42 | "cell_type": "code",
|
43 |
| - "execution_count": 32, |
| 43 | + "execution_count": 3, |
44 | 44 | "metadata": {},
|
45 | 45 | "outputs": [],
|
46 | 46 | "source": [
|
|
49 | 49 | },
|
50 | 50 | {
|
51 | 51 | "cell_type": "code",
|
52 |
| - "execution_count": 16, |
| 52 | + "execution_count": 4, |
53 | 53 | "metadata": {},
|
54 | 54 | "outputs": [
|
55 | 55 | {
|
|
90 | 90 | },
|
91 | 91 | {
|
92 | 92 | "cell_type": "code",
|
93 |
| - "execution_count": 24, |
| 93 | + "execution_count": 5, |
94 | 94 | "metadata": {},
|
95 | 95 | "outputs": [],
|
96 | 96 | "source": [
|
|
103 | 103 | },
|
104 | 104 | {
|
105 | 105 | "cell_type": "code",
|
106 |
| - "execution_count": 25, |
| 106 | + "execution_count": 6, |
107 | 107 | "metadata": {},
|
108 | 108 | "outputs": [
|
109 | 109 | {
|
|
130 | 130 | },
|
131 | 131 | {
|
132 | 132 | "cell_type": "code",
|
133 |
| - "execution_count": 26, |
| 133 | + "execution_count": 7, |
134 | 134 | "metadata": {},
|
135 | 135 | "outputs": [
|
136 | 136 | {
|
|
249 | 249 | "9 37 7.7 B <NA>"
|
250 | 250 | ]
|
251 | 251 | },
|
252 |
| - "execution_count": 26, |
| 252 | + "execution_count": 7, |
253 | 253 | "metadata": {},
|
254 | 254 | "output_type": "execute_result"
|
255 | 255 | }
|
|
269 | 269 | "cell_type": "markdown",
|
270 | 270 | "metadata": {},
|
271 | 271 | "source": [
|
272 |
| - "## Computing and missing values" |
| 272 | + "# Computing and missing values" |
273 | 273 | ]
|
274 | 274 | },
|
275 | 275 | {
|
|
281 | 281 | },
|
282 | 282 | {
|
283 | 283 | "cell_type": "code",
|
284 |
| - "execution_count": 27, |
| 284 | + "execution_count": 8, |
285 | 285 | "metadata": {},
|
286 | 286 | "outputs": [
|
287 | 287 | {
|
|
290 | 290 | "165"
|
291 | 291 | ]
|
292 | 292 | },
|
293 |
| - "execution_count": 27, |
| 293 | + "execution_count": 8, |
294 | 294 | "metadata": {},
|
295 | 295 | "output_type": "execute_result"
|
296 | 296 | }
|
|
301 | 301 | },
|
302 | 302 | {
|
303 | 303 | "cell_type": "code",
|
304 |
| - "execution_count": 28, |
| 304 | + "execution_count": 9, |
305 | 305 | "metadata": {},
|
306 | 306 | "outputs": [
|
307 | 307 | {
|
|
310 | 310 | "49.3"
|
311 | 311 | ]
|
312 | 312 | },
|
313 |
| - "execution_count": 28, |
| 313 | + "execution_count": 9, |
314 | 314 | "metadata": {},
|
315 | 315 | "output_type": "execute_result"
|
316 | 316 | }
|
|
321 | 321 | },
|
322 | 322 | {
|
323 | 323 | "cell_type": "code",
|
324 |
| - "execution_count": 29, |
| 324 | + "execution_count": 10, |
325 | 325 | "metadata": {},
|
326 | 326 | "outputs": [
|
327 | 327 | {
|
|
406 | 406 | "max 37.000000 7.700000"
|
407 | 407 | ]
|
408 | 408 | },
|
409 |
| - "execution_count": 29, |
| 409 | + "execution_count": 10, |
410 | 410 | "metadata": {},
|
411 | 411 | "output_type": "execute_result"
|
412 | 412 | }
|
|
424 | 424 | },
|
425 | 425 | {
|
426 | 426 | "cell_type": "code",
|
427 |
| - "execution_count": 31, |
| 427 | + "execution_count": 11, |
428 | 428 | "metadata": {},
|
429 | 429 | "outputs": [
|
430 | 430 | {
|
|
466 | 466 | " <tr>\n",
|
467 | 467 | " <th>top</th>\n",
|
468 | 468 | " <td>A</td>\n",
|
469 |
| - " <td>str1_str1</td>\n", |
| 469 | + " <td>str1</td>\n", |
470 | 470 | " </tr>\n",
|
471 | 471 | " <tr>\n",
|
472 | 472 | " <th>freq</th>\n",
|
|
481 | 481 | " category_data string_data\n",
|
482 | 482 | "count 9 9\n",
|
483 | 483 | "unique 2 9\n",
|
484 |
| - "top A str1_str1\n", |
| 484 | + "top A str1\n", |
485 | 485 | "freq 5 1"
|
486 | 486 | ]
|
487 | 487 | },
|
488 |
| - "execution_count": 31, |
| 488 | + "execution_count": 11, |
489 | 489 | "metadata": {},
|
490 | 490 | "output_type": "execute_result"
|
491 | 491 | }
|
|
496 | 496 | },
|
497 | 497 | {
|
498 | 498 | "cell_type": "code",
|
499 |
| - "execution_count": 39, |
| 499 | + "execution_count": 12, |
500 | 500 | "metadata": {},
|
501 | 501 | "outputs": [
|
502 | 502 | {
|
|
547 | 547 | "B 3"
|
548 | 548 | ]
|
549 | 549 | },
|
550 |
| - "execution_count": 39, |
| 550 | + "execution_count": 12, |
551 | 551 | "metadata": {},
|
552 | 552 | "output_type": "execute_result"
|
553 | 553 | }
|
|
558 | 558 | " .count()"
|
559 | 559 | ]
|
560 | 560 | },
|
| 561 | + { |
| 562 | + "cell_type": "markdown", |
| 563 | + "metadata": {}, |
| 564 | + "source": [ |
| 565 | + "# Selecting rows with missing data" |
| 566 | + ] |
| 567 | + }, |
561 | 568 | {
|
562 | 569 | "cell_type": "code",
|
563 |
| - "execution_count": null, |
| 570 | + "execution_count": 16, |
564 | 571 | "metadata": {},
|
565 |
| - "outputs": [], |
566 |
| - "source": [] |
| 572 | + "outputs": [ |
| 573 | + { |
| 574 | + "data": { |
| 575 | + "text/html": [ |
| 576 | + "<div>\n", |
| 577 | + "<style scoped>\n", |
| 578 | + " .dataframe tbody tr th:only-of-type {\n", |
| 579 | + " vertical-align: middle;\n", |
| 580 | + " }\n", |
| 581 | + "\n", |
| 582 | + " .dataframe tbody tr th {\n", |
| 583 | + " vertical-align: top;\n", |
| 584 | + " }\n", |
| 585 | + "\n", |
| 586 | + " .dataframe thead th {\n", |
| 587 | + " text-align: right;\n", |
| 588 | + " }\n", |
| 589 | + "</style>\n", |
| 590 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 591 | + " <thead>\n", |
| 592 | + " <tr style=\"text-align: right;\">\n", |
| 593 | + " <th></th>\n", |
| 594 | + " <th>int_data</th>\n", |
| 595 | + " <th>float_data</th>\n", |
| 596 | + " <th>category_data</th>\n", |
| 597 | + " <th>string_data</th>\n", |
| 598 | + " </tr>\n", |
| 599 | + " </thead>\n", |
| 600 | + " <tbody>\n", |
| 601 | + " <tr>\n", |
| 602 | + " <th>6</th>\n", |
| 603 | + " <td><NA></td>\n", |
| 604 | + " <td>5.5</td>\n", |
| 605 | + " <td>A</td>\n", |
| 606 | + " <td>str3</td>\n", |
| 607 | + " </tr>\n", |
| 608 | + " <tr>\n", |
| 609 | + " <th>7</th>\n", |
| 610 | + " <td>29</td>\n", |
| 611 | + " <td>NaN</td>\n", |
| 612 | + " <td>B</td>\n", |
| 613 | + " <td>str3_str1</td>\n", |
| 614 | + " </tr>\n", |
| 615 | + " <tr>\n", |
| 616 | + " <th>8</th>\n", |
| 617 | + " <td>31</td>\n", |
| 618 | + " <td>3.3</td>\n", |
| 619 | + " <td>NaN</td>\n", |
| 620 | + " <td>str2_str3</td>\n", |
| 621 | + " </tr>\n", |
| 622 | + " <tr>\n", |
| 623 | + " <th>9</th>\n", |
| 624 | + " <td>37</td>\n", |
| 625 | + " <td>7.7</td>\n", |
| 626 | + " <td>B</td>\n", |
| 627 | + " <td><NA></td>\n", |
| 628 | + " </tr>\n", |
| 629 | + " </tbody>\n", |
| 630 | + "</table>\n", |
| 631 | + "</div>" |
| 632 | + ], |
| 633 | + "text/plain": [ |
| 634 | + " int_data float_data category_data string_data\n", |
| 635 | + "6 <NA> 5.5 A str3\n", |
| 636 | + "7 29 NaN B str3_str1\n", |
| 637 | + "8 31 3.3 NaN str2_str3\n", |
| 638 | + "9 37 7.7 B <NA>" |
| 639 | + ] |
| 640 | + }, |
| 641 | + "execution_count": 16, |
| 642 | + "metadata": {}, |
| 643 | + "output_type": "execute_result" |
| 644 | + } |
| 645 | + ], |
| 646 | + "source": [ |
| 647 | + "data2[data2.isnull().any(axis=1)]" |
| 648 | + ] |
567 | 649 | }
|
568 | 650 | ],
|
569 | 651 | "metadata": {
|
|
582 | 664 | "name": "python",
|
583 | 665 | "nbconvert_exporter": "python",
|
584 | 666 | "pygments_lexer": "ipython3",
|
585 |
| - "version": "3.7.6" |
586 |
| - } |
| 667 | + "version": "3.7.7" |
| 668 | + }, |
| 669 | + "toc-autonumbering": true |
587 | 670 | },
|
588 | 671 | "nbformat": 4,
|
589 | 672 | "nbformat_minor": 4
|
|
0 commit comments