-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalysis.py
More file actions
122 lines (103 loc) · 5.34 KB
/
analysis.py
File metadata and controls
122 lines (103 loc) · 5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def analysis(makeGraphs = False):
# collection of account numbers for locating each data set
googleAccount = "600-613-00"
appleAccount = "499-130-00"
microsoftAccount = "312-003-50"
amazonAccount = "434-502-00"
toyotaAccount = "707-074-44"
# used for the for loop at the end for the main executable
totalCompanies = ["Google", "Apple", "Microsoft", "Amazon", "Toyota"]
starterDataFrame = pd.read_csv("./Pandas Hackathon Files/HackathonDataset.csv")
starterDataFrame = starterDataFrame.fillna("0")
# this method gathers the specific account data for each company, and imports it into a list.
def gatherAccountData(company):
accountData = []
for index, row in starterDataFrame.iterrows(): # for each row, determine what company it is and if the account number matches.
if row['rewards_number'] == googleAccount and company == "Google":
accountData.append(index)
elif row['rewards_number'] == appleAccount and company == "Apple":
accountData.append(index)
elif row['rewards_number'] == microsoftAccount and company == "Microsoft":
accountData.append(index)
elif row['rewards_number'] == amazonAccount and company == "Amazon":
accountData.append(index)
elif row['rewards_number'] == toyotaAccount and company == "Toyota":
accountData.append(index)
#print({company}, "data has been located.")
return accountData
# this method locates the average spending for each company based upon the data set.
# takes in the company name, and the data index list for that company.
# this makes it more efficient by already knowing what rows it needs to aim for.
def analyzeSpending(company, data):
amount = 0
transactions = len(data)
for items in data:
curAmount = starterDataFrame.iloc[items]['order_amt'] # the curAmount contains a '$' sign
amount += float(curAmount.replace('$', ''))
#print({company}, "spending analyzed")
return amount / transactions # gets the average
# this method determines the average amount of discounts applied. Does not take into account orders that did not have discounts as that would ruin the results.
def analyzeDiscounts(company, data):
amount = 0.0
for items in data:
amount += float(starterDataFrame.iloc[items]['discount_amt'])
return amount
# y axis revenue, x axis frequency of purchases
def analyzeFrequency(company, data):
x = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
y = [] # we need to determine the average revenue per month, and these must align with the above list.
date = [] # what is the current date for this transaction?
amount = [] # what was the transaction amount?
for items in data: # gathers up the date and amount per transaction and places them in a list
cur_date = starterDataFrame.iloc[items]["transaction_date"]
curAmount = starterDataFrame.iloc[items]['order_amt']
n_date = cur_date.split('-')
date.append(int(n_date[1]))
amount.append(float(curAmount.replace('$', '')))
for month in range(1, 13): # for each month in the graph, determine the average spending
averageForMonth = 0
countForMonth = 0
for dates in date: # for the dates, determine if it is within the month specficied, and then collect all of its transactions
if dates == month:
countForMonth += 1
averageForMonth += amount[date.index(dates)]
averageForMonth = averageForMonth / countForMonth
y.append(averageForMonth) # once the average spending is well averaged, append to the y list for this month
if makeGraphs:
# this creates each graph
#plt.plot(x, y)
#plt.title(f"{company} Monthly Spending Frequency")
#plt.xlabel("Month")
#plt.ylabel("Average Spending ($)")
#plt.show()
pass
#print({company}, "frequency completed.")
revenuePerCompany = []
discountsPerCompany = []
# main executable
for company in totalCompanies:
accountData = gatherAccountData(company)
avgSpending = analyzeSpending(company, accountData)
print("The Average spending for", company, "is: $", avgSpending)
revenuePerCompany.append(avgSpending)
avgDiscounts = analyzeDiscounts(company, accountData)
print("The average discounts applied for", {company}, "is: $", avgDiscounts)
discountsPerCompany.append(avgDiscounts)
analyzeFrequency(company, accountData)
#print("Spending Frequency and Averages completed.")
if makeGraphs:
plt.bar(totalCompanies, revenuePerCompany)
plt.title("All companies Yearly Spending")
plt.xlabel("Companies")
plt.ylabel("Spending ($)")
plt.show()
sizes = [100, 0]
labels = ["Non Corporations", "Corporations"]
plt.pie(sizes, labels = labels, autopct="%1.2f%%")
plt.title("All Yearly Discounts")
plt.legend(labels, loc="lower right")
plt.show()
analysis(True)