Skip to content

Commit 5ef881a

Browse files
committed
ok
1 parent 598944b commit 5ef881a

File tree

3 files changed

+233
-9
lines changed

3 files changed

+233
-9
lines changed

README.md

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
# python-numpy-pandas-evaluation
22

3+
### Programming
4+
5+
You have an `assessment.py` and `testing.py` file. You should create a `src` and `test` directory for each with the `__init__.py` file. You should use `pytest` to make sure your code is working.
6+
7+
Complete the functions in `assessment.py` and use the code in `testing.py` to make sure your code is correct.
38

49
### Titanic Modeling
510

6-
The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew.
11+
The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew.
712

813
One of the reasons that the shipwreck led to such loss of life was that there were not enough lifeboats for the passengers and crew. Although there was some element of luck involved in surviving the sinking, some groups of people were more likely to survive than others, such as women, children, and the upper-class.
914

10-
You are to complete the analysis of what sorts of people were likely to survive.
15+
You are to complete the analysis of what sorts of people were likely to survive.
1116

1217
Use `train.csv` as the data file.
1318

@@ -16,13 +21,13 @@ Data Dictionary
1621
Variable Definition Key
1722
survival Survival 0 = No, 1 = Yes
1823
pclass Ticket class 1 = 1st, 2 = 2nd, 3 = 3rd
19-
sex Sex
20-
Age Age in years
21-
sibsp # of siblings / spouses aboard the Titanic
22-
parch # of parents / children aboard the Titanic
23-
ticket Ticket number
24-
fare Passenger fare
25-
cabin Cabin number
24+
sex Sex
25+
Age Age in years
26+
sibsp # of siblings / spouses aboard the Titanic
27+
parch # of parents / children aboard the Titanic
28+
ticket Ticket number
29+
fare Passenger fare
30+
cabin Cabin number
2631
embarked Port of Embarkation C = Cherbourg, Q = Queenstown, S = Southampton
2732
Variable Notes
2833
pclass: A proxy for socio-economic status (SES)

assessment.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
5+
# PYTHON SECTION
6+
7+
def count_characters(string):
8+
'''
9+
INPUT: STRING
10+
OUTPUT: DICT (with counts of each character in input string)
11+
12+
Return a dictionary which contains
13+
a count of the number of times each character appears in the string.
14+
Characters which with a count of 0 should not be included in the
15+
output dictionary.
16+
'''
17+
pass
18+
19+
20+
def invert_dictionary(d):
21+
'''
22+
INPUT: DICT
23+
OUTPUT: DICT (of sets of input keys indexing the same input values
24+
indexed by the input values)
25+
26+
Given a dictionary d, return a new dictionary with d's values
27+
as keys and the value for a given key being
28+
the set of d's keys which shared the same value.
29+
e.g. {'a': 2, 'b': 4, 'c': 2} => {2: {'a', 'c'}, 4: {'b'}}
30+
'''
31+
pass
32+
33+
34+
def word_count(filename):
35+
'''
36+
INPUT: STRING
37+
OUTPUT: INT, INT, INT (a tuple with line, word,
38+
and character count of named INPUT file)
39+
40+
The INPUT filename is the name of a text file.
41+
The OUTPUT is a tuple containting (in order)
42+
the following stats for the text file:
43+
1. number of lines
44+
2. number of words (broken by whitespace)
45+
3. number of characters
46+
'''
47+
pass
48+
49+
50+
def matrix_multiplication(A, B):
51+
'''
52+
INPUT: LIST (of length n) OF LIST (of length n) OF INTEGERS,
53+
LIST (of length n) OF LIST (of length n) OF INTEGERS
54+
OUTPUT: LIST OF LIST OF INTEGERS
55+
(storing the product of a matrix multiplication operation)
56+
57+
Return the matrix which is the product of matrix A and matrix B
58+
where A and B will be (a) integer valued (b) square matrices
59+
(c) of size n-by-n (d) encoded as lists of lists.
60+
61+
For example:
62+
A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] corresponds to the matrix
63+
64+
| 2 3 4 |
65+
| 6 4 2 |
66+
|-1 2 0 |
67+
68+
Please do not use numpy. Write your solution in straight python.
69+
'''
70+
pass
71+
72+
73+
# NumPy SECTION
74+
75+
76+
def array_work(rows, cols, scalar, matrixA):
77+
'''
78+
INPUT: INT, INT, INT, NUMPY ARRAY
79+
OUTPUT: NUMPY ARRAY
80+
(of matrix product of r-by-c matrix of "scalar"'s time matrixA)
81+
82+
Create matrix of size (rows, cols) with elements initialized to the scalar
83+
value. Right multiply that matrix with the passed matrixA (i.e. AB, not
84+
BA). Return the result of the multiplication. You needn't check for
85+
matrix compatibililty, but you accomplish this in a single line.
86+
87+
E.g., array_work(2, 3, 5, [[3, 4], [5, 6], [7, 8]])
88+
[[3, 4], [[5, 5, 5],
89+
[5, 6], * [5, 5, 5]]
90+
[7, 8]]
91+
'''
92+
pass
93+
94+
95+
def boolean_indexing(arr, minimum):
96+
'''
97+
INPUT: NUMPY ARRAY, INT
98+
OUTPUT: NUMPY ARRAY
99+
(of just elements in "arr" greater or equal to "minimum")
100+
101+
Return an array of only the elements of "arr" that are greater than or
102+
equal to "minimum"
103+
104+
Ex:
105+
In [1]: boolean_indexing([[3, 4, 5], [6, 7, 8]], 7)
106+
Out[1]: array([7, 8])
107+
'''
108+
pass
109+
110+
111+
# Pandas SECTION
112+
113+
def make_series(start, length, index):
114+
'''
115+
INPUTS: INT, INT, LIST (of length "length")
116+
OUTPUT: PANDAS SERIES (of "length" sequential integers
117+
beginning with "start" and with index "index")
118+
119+
Create a pandas Series of length "length" with index "index"
120+
and with elements that are sequential integers starting from "start".
121+
You may assume the length of index will be "length".
122+
123+
E.g.,
124+
In [1]: make_series(5, 3, ['a', 'b', 'c'])
125+
Out[1]:
126+
a 5
127+
b 6
128+
c 7
129+
dtype: int64
130+
'''
131+
pass
132+
133+
134+
def data_frame_work(df, colA, colB, colC):
135+
'''
136+
INPUT: DATAFRAME, STR, STR, STR
137+
OUTPUT: None
138+
139+
Insert a column (colC) into the dataframe that is the sum of colA and colB.
140+
Assume that df contains columns colA and colB and that these are numeric.
141+
'''
142+
pass

testing.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
def test_count_characters(self):
2+
string = "abafdcggfaabe"
3+
answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
4+
result = a.count_characters(string)
5+
self.assertEqual(result, answer)
6+
7+
8+
def test_invert_dictionary(self):
9+
d = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
10+
result = {4: {'a'}, 2: {'b', 'f', 'g'}, 1: {'c', 'd', 'e'}}
11+
self.assertEqual(a.invert_dictionary(d), result)
12+
13+
14+
def test_word_count(self):
15+
self.assertEqual(a.word_count('data/alice.txt'), (17, 1615, 8449))
16+
17+
18+
def test_matrix_multiplication(self):
19+
A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]]
20+
B = [[8, -3, 1], [-7, 3, 2], [0, 3, 3]]
21+
answer = [[-5, 15, 20], [20, 0, 20], [-22, 9, 3]]
22+
self.assertEqual(a.matrix_multiplication(A, B), answer)
23+
24+
25+
def test_array_work(self):
26+
matrixA = np.array([[-4, -2],
27+
[0, -3],
28+
[-4, -1],
29+
[-1, 1],
30+
[-3, 0]])
31+
answer1 = np.array([[-24, -24, -24],
32+
[-12, -12, -12],
33+
[-20, -20, -20],
34+
[0, 0, 0],
35+
[-12, -12, -12]])
36+
result1 = a.array_work(2, 3, 4, matrixA)
37+
self.assertTrue(np.all(answer1 == result1))
38+
39+
answer2 = np.array([[-36, -36],
40+
[-18, -18],
41+
[-30, -30],
42+
[0, 0],
43+
[-18, -18]])
44+
result2 = a.array_work(2, 2, 6, matrixA)
45+
self.assertTrue(np.all(answer2 == result2))
46+
47+
48+
def test_make_series(self):
49+
result = a.make_series(7, 4, ['a', 'b', 'c', 'd'])
50+
self.assertTrue(isinstance(result, pd.Series))
51+
self.assertEqual(result['a'], 7)
52+
self.assertEqual(result['d'], 10)
53+
54+
result = a.make_series(22, 5, ['a', 'b', 'c', 'd', 'hi'])
55+
self.assertEqual(result['a'], 22)
56+
self.assertEqual(result['d'], 25)
57+
self.assertEqual(result['hi'], 26)
58+
59+
60+
def test_data_frame_work(self):
61+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
62+
colA, colB, colC = ('a', 'b', 'c')
63+
a.data_frame_work(df, colA, colB, colC)
64+
self.assertTrue(colC in df.columns.tolist())
65+
self.assertEqual(df[colC].tolist(), [5, 7, 9])
66+
67+
68+
def test_boolean_indexing(self):
69+
arr = np.array([[-4, -4, -3],
70+
[-1, 16, -4],
71+
[-3, 6, 4]])
72+
result1 = a.boolean_indexing(arr, 0)
73+
answer1 = np.array([16, 6, 4])
74+
self.assertTrue(np.all(result1 == answer1))
75+
result2 = a.boolean_indexing(arr, 10)
76+
answer2 = np.array([16])
77+
self.assertTrue(np.all(result2 == answer2))

0 commit comments

Comments
 (0)