diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..445853b12 --- /dev/null +++ b/.gitignore @@ -0,0 +1,132 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# custom +solution_guidance/models/test-* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..4b000a018 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ + +# Use an official Python runtime as a parent image +FROM python:3.7.5-stretch + +RUN apt-get update && apt-get install -y \ +python3-dev \ +build-essential + +# Set the working directory to /app +WORKDIR /app + +# Copy the current directory contents into the container at /app +ADD . /app + +# Install any needed packages specified in requirements.txt +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt + +# Make port 80 available to the world outside this container +EXPOSE 80 + +# Define environment variable +ENV NAME World + +# Run app.py when the container launches +CMD ["python", "app.py"] diff --git a/EDA.ipynb b/EDA.ipynb new file mode 100644 index 000000000..85e81ddc0 --- /dev/null +++ b/EDA.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.6.6","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"colab":{"name":"EDA.ipynb","provenance":[],"collapsed_sections":[]}},"cells":[{"cell_type":"code","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","trusted":true,"id":"zxPIh_mSies9","colab_type":"code","outputId":"351daaac-10e9-4831-fe97-a7ec25946f73","executionInfo":{"status":"ok","timestamp":1584058231910,"user_tz":300,"elapsed":524,"user":{"displayName":"lei liu","photoUrl":"","userId":"14671864135837887649"}},"colab":{"base_uri":"https://localhost:8080/","height":35}},"source":["import numpy as np # linear algebra\n","import time\n","import cv2\n","import os\n","from matplotlib import pyplot as plt\n","from tqdm import tqdm_notebook\n","from glob import glob\n","import multiprocessing\n","from google.colab import drive\n","drive.mount('/content/gdrive/')\n","os.chdir('/content/gdrive/My Drive/github/ai-workflow-capstone')\n","from IPython.display import Markdown, display\n","import pandas as pd\n","from src.data_collection import fetch_data"],"execution_count":10,"outputs":[{"output_type":"stream","text":["Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount(\"/content/gdrive/\", force_remount=True).\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"zx-onyW6T18l","colab_type":"code","colab":{}},"source":["prod_data = './cs-production'\n","train_data = './cs-train'"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"0UgG7qQGUQmY","colab_type":"code","colab":{}},"source":["df = fetch_data(train_data,False)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3oMUid70VNZP","colab_type":"code","outputId":"592ecbd3-d1bd-472f-e706-26d29733c173","executionInfo":{"status":"ok","timestamp":1584058277983,"user_tz":300,"elapsed":418,"user":{"displayName":"lei liu","photoUrl":"","userId":"14671864135837887649"}},"colab":{"base_uri":"https://localhost:8080/","height":198}},"source":["df.head()"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["
| \n"," | country | \n","customer_id | \n","invoice | \n","price | \n","stream_id | \n","times_viewed | \n","year | \n","month | \n","day | \n","
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n","United Kingdom | \n","13085.0 | \n","489434 | \n","6.95 | \n","85048 | \n","12 | \n","2017 | \n","11 | \n","28 | \n","
| 1 | \n","United Kingdom | \n","NaN | \n","489597 | \n","8.65 | \n","22130 | \n","1 | \n","2017 | \n","11 | \n","28 | \n","
| 2 | \n","United Kingdom | \n","NaN | \n","489597 | \n","1.70 | \n","22132 | \n","6 | \n","2017 | \n","11 | \n","28 | \n","
| 3 | \n","United Kingdom | \n","NaN | \n","489597 | \n","1.70 | \n","22133 | \n","4 | \n","2017 | \n","11 | \n","28 | \n","
| 4 | \n","United Kingdom | \n","NaN | \n","489597 | \n","0.87 | \n","22134 | \n","1 | \n","2017 | \n","11 | \n","28 | \n","
`s get reset. However, we also reset the\n// bottom margin to use `rem` units instead of `em`.\np {\n margin-top: 0;\n margin-bottom: $paragraph-margin-bottom;\n}\n\n// Abbreviations\n//\n// 1. Remove the bottom border in Firefox 39-.\n// 2. Add the correct text decoration in Chrome, Edge, IE, Opera, and Safari.\n// 3. Add explicit cursor to indicate changed behavior.\n// 4. Duplicate behavior to the data-* attribute for our tooltip plugin\n\nabbr[title],\nabbr[data-original-title] { // 4\n text-decoration: underline; // 2\n text-decoration: underline dotted; // 2\n cursor: help; // 3\n border-bottom: 0; // 1\n}\n\naddress {\n margin-bottom: 1rem;\n font-style: normal;\n line-height: inherit;\n}\n\nol,\nul,\ndl {\n margin-top: 0;\n margin-bottom: 1rem;\n}\n\nol ol,\nul ul,\nol ul,\nul ol {\n margin-bottom: 0;\n}\n\ndt {\n font-weight: $dt-font-weight;\n}\n\ndd {\n margin-bottom: .5rem;\n margin-left: 0; // Undo browser default\n}\n\nblockquote {\n margin: 0 0 1rem;\n}\n\ndfn {\n font-style: italic; // Add the correct font style in Android 4.3-\n}\n\n// stylelint-disable font-weight-notation\nb,\nstrong {\n font-weight: bolder; // Add the correct font weight in Chrome, Edge, and Safari\n}\n// stylelint-enable font-weight-notation\n\nsmall {\n font-size: 80%; // Add the correct font size in all browsers\n}\n\n//\n// Prevent `sub` and `sup` elements from affecting the line height in\n// all browsers.\n//\n\nsub,\nsup {\n position: relative;\n font-size: 75%;\n line-height: 0;\n vertical-align: baseline;\n}\n\nsub { bottom: -.25em; }\nsup { top: -.5em; }\n\n\n//\n// Links\n//\n\na {\n color: $link-color;\n text-decoration: $link-decoration;\n background-color: transparent; // Remove the gray background on active links in IE 10.\n -webkit-text-decoration-skip: objects; // Remove gaps in links underline in iOS 8+ and Safari 8+.\n\n @include hover {\n color: $link-hover-color;\n text-decoration: $link-hover-decoration;\n }\n}\n\n// And undo these styles for placeholder links/named anchors (without href)\n// which have not been made explicitly keyboard-focusable (without tabindex).\n// It would be more straightforward to just use a[href] in previous block, but that\n// causes specificity issues in many other styles that are too complex to fix.\n// See https://github.com/twbs/bootstrap/issues/19402\n\na:not([href]):not([tabindex]) {\n color: inherit;\n text-decoration: none;\n\n @include hover-focus {\n color: inherit;\n text-decoration: none;\n }\n\n &:focus {\n outline: 0;\n }\n}\n\n\n//\n// Code\n//\n\n// stylelint-disable font-family-no-duplicate-names\npre,\ncode,\nkbd,\nsamp {\n font-family: monospace, monospace; // Correct the inheritance and scaling of font size in all browsers.\n font-size: 1em; // Correct the odd `em` font sizing in all browsers.\n}\n// stylelint-enable font-family-no-duplicate-names\n\npre {\n // Remove browser default top margin\n margin-top: 0;\n // Reset browser default of `1em` to use `rem`s\n margin-bottom: 1rem;\n // Don't allow content to break outside\n overflow: auto;\n // We have @viewport set which causes scrollbars to overlap content in IE11 and Edge, so\n // we force a non-overlapping, non-auto-hiding scrollbar to counteract.\n -ms-overflow-style: scrollbar;\n}\n\n\n//\n// Figures\n//\n\nfigure {\n // Apply a consistent margin strategy (matches our type styles).\n margin: 0 0 1rem;\n}\n\n\n//\n// Images and content\n//\n\nimg {\n vertical-align: middle;\n border-style: none; // Remove the border on images inside links in IE 10-.\n}\n\nsvg:not(:root) {\n overflow: hidden; // Hide the overflow in IE\n}\n\n\n//\n// Tables\n//\n\ntable {\n border-collapse: collapse; // Prevent double borders\n}\n\ncaption {\n padding-top: $table-cell-padding;\n padding-bottom: $table-cell-padding;\n color: $text-muted;\n text-align: left;\n caption-side: bottom;\n}\n\nth {\n // Matches default `