ok

chyld · chyld · commit 5ef881a0bcef · 2018-07-19T21:39:27.000-07:00
diff --git a/README.md b/README.md
@@ -1,13 +1,18 @@
 # python-numpy-pandas-evaluation
 
+### Programming
+
+You have an `assessment.py` and `testing.py` file. You should create a `src` and `test` directory for each with the `__init__.py` file. You should use `pytest` to make sure your code is working.
+
+Complete the functions in `assessment.py` and use the code in `testing.py` to make sure your code is correct.
 
 ### Titanic Modeling
 
-The sinking of the RMS Titanic is one of the most infamous shipwrecks in history.  On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew.
+The sinking of the RMS Titanic is one of the most infamous shipwrecks in history. On April 15, 1912, during her maiden voyage, the Titanic sank after colliding with an iceberg, killing 1502 out of 2224 passengers and crew.
 
 One of the reasons that the shipwreck led to such loss of life was that there were not enough lifeboats for the passengers and crew. Although there was some element of luck involved in surviving the sinking, some groups of people were more likely to survive than others, such as women, children, and the upper-class.
 
-You are to complete the analysis of what sorts of people were likely to survive. 
+You are to complete the analysis of what sorts of people were likely to survive.
 
 Use `train.csv` as the data file.
 
@@ -16,13 +21,13 @@ Data Dictionary
 Variable	Definition	Key
 survival	Survival	0 = No, 1 = Yes
 pclass	Ticket class	1 = 1st, 2 = 2nd, 3 = 3rd
-sex	Sex	
-Age	Age in years	
-sibsp	# of siblings / spouses aboard the Titanic	
-parch	# of parents / children aboard the Titanic	
-ticket	Ticket number	
-fare	Passenger fare	
-cabin	Cabin number	
+sex	Sex
+Age	Age in years
+sibsp	# of siblings / spouses aboard the Titanic
+parch	# of parents / children aboard the Titanic
+ticket	Ticket number
+fare	Passenger fare
+cabin	Cabin number
 embarked	Port of Embarkation	C = Cherbourg, Q = Queenstown, S = Southampton
 Variable Notes
 pclass: A proxy for socio-economic status (SES)
diff --git a/assessment.py b/assessment.py
@@ -0,0 +1,142 @@
+import numpy as np
+import pandas as pd
+
+
+# PYTHON SECTION
+
+def count_characters(string):
+    '''
+    INPUT: STRING
+    OUTPUT: DICT (with counts of each character in input string)
+
+    Return a dictionary which contains
+    a count of the number of times each character appears in the string.
+    Characters which with a count of 0 should not be included in the
+    output dictionary.
+    '''
+    pass
+
+
+def invert_dictionary(d):
+    '''
+    INPUT: DICT
+    OUTPUT: DICT (of sets of input keys indexing the same input values
+                  indexed by the input values)
+
+    Given a dictionary d, return a new dictionary with d's values
+    as keys and the value for a given key being
+    the set of d's keys which shared the same value.
+    e.g. {'a': 2, 'b': 4, 'c': 2} => {2: {'a', 'c'}, 4: {'b'}}
+    '''
+    pass
+
+
+def word_count(filename):
+    '''
+    INPUT: STRING
+    OUTPUT: INT, INT, INT (a tuple with line, word,
+                           and character count of named INPUT file)
+
+    The INPUT filename is the name of a text file.
+    The OUTPUT is a tuple containting (in order)
+    the following stats for the text file:
+      1. number of lines
+      2. number of words (broken by whitespace)
+      3. number of characters
+    '''
+    pass
+
+
+def matrix_multiplication(A, B):
+    '''
+    INPUT: LIST (of length n) OF LIST (of length n) OF INTEGERS,
+            LIST (of length n) OF LIST (of length n) OF INTEGERS
+    OUTPUT: LIST OF LIST OF INTEGERS
+            (storing the product of a matrix multiplication operation)
+
+    Return the matrix which is the product of matrix A and matrix B
+    where A and B will be (a) integer valued (b) square matrices
+    (c) of size n-by-n (d) encoded as lists of lists.
+
+    For example:
+    A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]] corresponds to the matrix
+
+        | 2  3  4 |
+        | 6  4  2 |
+        |-1  2  0 |
+
+    Please do not use numpy. Write your solution in straight python.
+    '''
+    pass
+
+
+# NumPy SECTION
+
+
+def array_work(rows, cols, scalar, matrixA):
+    '''
+    INPUT: INT, INT, INT, NUMPY ARRAY
+    OUTPUT: NUMPY ARRAY
+    (of matrix product of r-by-c matrix of "scalar"'s time matrixA)
+
+    Create matrix of size (rows, cols) with elements initialized to the scalar
+    value. Right multiply that matrix with the passed matrixA (i.e. AB, not
+    BA).  Return the result of the multiplication.  You needn't check for
+    matrix compatibililty, but you accomplish this in a single line.
+
+    E.g., array_work(2, 3, 5, [[3, 4], [5, 6], [7, 8]])
+           [[3, 4],      [[5, 5, 5],
+            [5, 6],   *   [5, 5, 5]]
+            [7, 8]]
+    '''
+    pass
+
+
+def boolean_indexing(arr, minimum):
+    '''
+    INPUT: NUMPY ARRAY, INT
+    OUTPUT: NUMPY ARRAY
+    (of just elements in "arr" greater or equal to "minimum")
+
+    Return an array of only the elements of "arr" that are greater than or
+    equal to "minimum"
+
+    Ex:
+    In [1]: boolean_indexing([[3, 4, 5], [6, 7, 8]], 7)
+    Out[1]: array([7, 8])
+    '''
+    pass
+
+
+# Pandas SECTION
+
+def make_series(start, length, index):
+    '''
+    INPUTS: INT, INT, LIST (of length "length")
+    OUTPUT: PANDAS SERIES (of "length" sequential integers
+             beginning with "start" and with index "index")
+
+    Create a pandas Series of length "length" with index "index"
+    and with elements that are sequential integers starting from "start".
+    You may assume the length of index will be "length".
+
+    E.g.,
+    In [1]: make_series(5, 3, ['a', 'b', 'c'])
+    Out[1]:
+    a    5
+    b    6
+    c    7
+    dtype: int64
+    '''
+    pass
+
+
+def data_frame_work(df, colA, colB, colC):
+    '''
+    INPUT: DATAFRAME, STR, STR, STR
+    OUTPUT: None
+
+    Insert a column (colC) into the dataframe that is the sum of colA and colB.
+    Assume that df contains columns colA and colB and that these are numeric.
+    '''
+    pass
diff --git a/testing.py b/testing.py
@@ -0,0 +1,77 @@
+def test_count_characters(self):
+    string = "abafdcggfaabe"
+    answer = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
+    result = a.count_characters(string)
+    self.assertEqual(result, answer)
+
+
+def test_invert_dictionary(self):
+    d = {"a": 4, "b": 2, "c": 1, "d": 1, "e": 1, "f": 2, "g": 2}
+    result = {4: {'a'}, 2: {'b', 'f', 'g'}, 1: {'c', 'd', 'e'}}
+    self.assertEqual(a.invert_dictionary(d), result)
+
+
+def test_word_count(self):
+    self.assertEqual(a.word_count('data/alice.txt'), (17, 1615, 8449))
+
+
+def test_matrix_multiplication(self):
+    A = [[2, 3, 4], [6, 4, 2], [-1, 2, 0]]
+    B = [[8, -3, 1], [-7, 3, 2], [0, 3, 3]]
+    answer = [[-5, 15, 20], [20, 0, 20], [-22, 9, 3]]
+    self.assertEqual(a.matrix_multiplication(A, B), answer)
+
+
+def test_array_work(self):
+    matrixA = np.array([[-4, -2],
+                        [0, -3],
+                        [-4, -1],
+                        [-1, 1],
+                        [-3, 0]])
+    answer1 = np.array([[-24, -24, -24],
+                        [-12, -12, -12],
+                        [-20, -20, -20],
+                        [0, 0, 0],
+                        [-12, -12, -12]])
+    result1 = a.array_work(2, 3, 4, matrixA)
+    self.assertTrue(np.all(answer1 == result1))
+
+    answer2 = np.array([[-36, -36],
+                        [-18, -18],
+                        [-30, -30],
+                        [0, 0],
+                        [-18, -18]])
+    result2 = a.array_work(2, 2, 6, matrixA)
+    self.assertTrue(np.all(answer2 == result2))
+
+
+def test_make_series(self):
+    result = a.make_series(7, 4, ['a', 'b', 'c', 'd'])
+    self.assertTrue(isinstance(result, pd.Series))
+    self.assertEqual(result['a'], 7)
+    self.assertEqual(result['d'], 10)
+
+    result = a.make_series(22, 5, ['a', 'b', 'c', 'd', 'hi'])
+    self.assertEqual(result['a'], 22)
+    self.assertEqual(result['d'], 25)
+    self.assertEqual(result['hi'], 26)
+
+
+def test_data_frame_work(self):
+    df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+    colA, colB, colC = ('a', 'b', 'c')
+    a.data_frame_work(df, colA, colB, colC)
+    self.assertTrue(colC in df.columns.tolist())
+    self.assertEqual(df[colC].tolist(), [5, 7, 9])
+
+
+def test_boolean_indexing(self):
+    arr = np.array([[-4, -4, -3],
+                    [-1, 16, -4],
+                    [-3, 6, 4]])
+    result1 = a.boolean_indexing(arr, 0)
+    answer1 = np.array([16, 6, 4])
+    self.assertTrue(np.all(result1 == answer1))
+    result2 = a.boolean_indexing(arr, 10)
+    answer2 = np.array([16])
+    self.assertTrue(np.all(result2 == answer2))