Skip to content

Commit 2ef8a78

Browse files
authored
Add files via upload
Create MongoDB and Cassandra for data
1 parent c58a7ed commit 2ef8a78

5 files changed

+459
-0
lines changed

C4_Cassandra.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# download cassandra-driver
2+
#python -m pip install cassandra-driver
3+
4+
from cassandra.cluster import Cluster
5+
# use local Cluster
6+
cluster = Cluster(['localhost'])
7+
# connect a new session
8+
session = cluster.connect()
9+
10+
# create keyspace
11+
session.execute("CREATE KEYSPACE FIT5137_MASL WITH replication = {'class':'SimpleStrategy', 'replication_factor':1}")
12+
13+
# create table and udt
14+
session.execute("USE FIT5137_MASL")
15+
16+
session.execute("CREATE TYPE sightingObs_type(duration text,text text,summary text)")
17+
18+
session.execute("""
19+
CREATE TABLE ufos(
20+
id text,
21+
state text,
22+
day int,
23+
month int,
24+
year int,
25+
hour int,
26+
shape text,
27+
city text,
28+
weatherObs_windchill double,
29+
weatherObs_wdire text,
30+
weatherObs_wspd double,
31+
weatherObs_pressure double,
32+
weatherObs_temp double,
33+
weatherObs_hail int,
34+
weatherObs_rain int,
35+
weatherObs_vis double,
36+
weatherObs_dewpt double,
37+
weatherObs_thunder int,
38+
weatherObs_fog int,
39+
weatherObs_tornado int,
40+
weatherObs_hum double,
41+
weatherObs_snow int,
42+
weatherObs_conds text,
43+
countyName text,
44+
sightingObs list<frozen<sightingObs_type>>,
45+
PRIMARY KEY((month,state),day,city,countyName,hour,year))
46+
""")
47+
48+
prepared = session.prepared("""
49+
INSERT INTO ufos
50+
(id,state,day,month,year,hour,shape,city,weatherObs_windchill,weatherObs_wdire,weatherObs_wspd,weatherObs_pressure,weatherObs_temp,weatherObs_hail,weatherObs_rain,weatherObs_vis,weatherObs_dewpt,weatherObs_thunder,weatherObs_fog,weatherObs_tornado,weatherObs_hum,weatherObs_snow,weatherObs_conds,countyName,sightingObs)
51+
VALUES()
52+
""")
53+
54+

C4_MongoDB.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# connect to mongodb,
2+
# Reference: https://www.patricia-anong.com/blog/pycharm,https://www.mongodb.com/blog/post/getting-started-with-python-and-mongodb
3+
_author = 'PEIYU LIU'
4+
import datetime
5+
6+
from pymongo import MongoClient
7+
8+
client = MongoClient('localhost', 27017)
9+
database = client['FIT5137MASL']
10+
ufo = database['ufo']
11+
12+
# (i) combine the date data (from the fields for year, month, day and hour)
13+
ufo.aggregate([
14+
{"$project": {"dateTime": {"$dateFromParts": {"year": "$year", "month": "$month", "day": "$day", "hour": "$hour"}},
15+
"_id": 0}}]);
16+
#
17+
# (ii) store the combined date data in a new field called dateTime,
18+
ufo.aggregate([
19+
{"$addFields": {
20+
"dateTime": {"$dateFromParts": {"year": "$year", "month": "$month", "day": "$day", "hour": "$hour"}}}},
21+
{"$merge": {"into": "ufo"}}
22+
]);
23+
24+
# (iii) remove the fields for year, month, day and hour,
25+
ufo.update_many({}, {"$unset": {"year": 1, "month": 1, "day": 1, "hour": 1}});
26+
27+
# (iv) add another field with your group name (e.g. groupName: "Group FIT5137" if your group name is FIT5137) and
28+
ufo.update_many({}, {"$set": {"groupName": "Group Suki&Albee"}});
29+
30+
# (v) store the updated collection in a new collection called ufoDates.
31+
ufo.aggregate([
32+
{"$match": {}},
33+
{"$out": "ufoDates"}
34+
]);
35+
36+
# use new collection
37+
ufoDates = database['ufoDates']
38+
39+
# C.1.4. Add the sighting with the following data to the ufoDates collection
40+
# Reference: dataTime:https://www.analyticsvidhya.com/blog/2020/02/mongodb-in-python-tutorial-for-beginners-using-pymongo/
41+
ufoDates.insert_one(
42+
{"state": "MA",
43+
"city": "BOSTON",
44+
"countryName": "SUFFOLK",
45+
"dateTime": datetime.datetime(1998, 7, 14, 23, 0),
46+
"shape": "sphere",
47+
"sightingObs":
48+
[{"duration": "40 min",
49+
"text": "I was going to my work on my night shift at the St Albin’s hospital and saw an unearthly ray of shooting lights which could be none other than a UFO!",
50+
"summary": "Unearthly ray of shooting lights"}]});
51+
52+
# C.1.5. Keeping the location, weather observations and dateTime information, find and remove the sighting observation record which has a duration of "2 1/2 minutes". To find the duration information please look closely at all of the fields of the documents in the ufoDates collection.
53+
ufoDates.update_many({"sightingObs.duration": "2 1/2 minutes"}, {"$unset": {"sightingObs": ""}});
54+
55+
# C.1.6. Use the aggregation pipeline to answer the following queries:
56+
# (i) What was the total number of sightings observed from the year 1990 to 2000 in the city of ‘SAN FRANCISCO’, in the state of ‘CA’. Your output can be in any format as long as it displays the required information.
57+
# result1_6 = ufoDates.aggregate([
58+
# {"$match": {"city": "SAN FRANCISCO", "state": "CA",
59+
# "dateTime": {"$gte":datetime(1990, 1, 1), "$lt": datetime.datetime(2001, 1, 1)}}},
60+
# {"$group": {"_id": "$state", "totalNumOfSObs": {"$sum": 1}}}
61+
# ]);
62+
#
63+
# print(list(result1_6))
64+
65+
# (ii) Using one MongoDB Shell query, find the average temperature, humidity, pressure and rainfall observed for all fireball shaped UFO sightings. The output should also display the average values rounded to 3 decimal places.
66+
result1_6_2 = ufoDates.aggregate([
67+
{"$match": {"shape": "fireball"}},
68+
{"$group": {"_id": "fireball",
69+
"avgTemp": {"$avg": "$weatherObs.temp"},
70+
"avgHumidity": {"$avg": "$weatherObs.hum"},
71+
"avgPressure": {"$avg": "$weatherObs.pressure"},
72+
"avgRainfallObs": {"$avg": "$weatherObs.rain"}}},
73+
{"$project": {"_id": "fireball",
74+
"avgTemp": {"$round": ["$avgTemp", 3]},
75+
"avgHumidity": {"$round": ["$avgHumidity", 3]},
76+
"avgPressure": {"$round": ["$avgPressure", 3]},
77+
"avgRainfallObs": {"$round": ["$avgRainfallObs", 3]}}}
78+
]);
79+
print(list(result1_6_2))
80+
# (iii) What was the month with the highest UFO sightings?
81+
result1_6_3 = ufoDates.aggregate([
82+
{"$project": {"monthName": {"$switch": {"branches": [
83+
{"case": {"$eq": [{"$month": "$dateTime"}, 1]}, "then": "January"},
84+
{"case": {"$eq": [{"$month": "$dateTime"}, 2]}, "then": "February"},
85+
{"case": {"$eq": [{"$month": "$dateTime"}, 3]}, "then": "March"},
86+
{"case": {"$eq": [{"$month": "$dateTime"}, 4]}, "then": "April"},
87+
{"case": {"$eq": [{"$month": "$dateTime"}, 5]}, "then": "May"},
88+
{"case": {"$eq": [{"$month": "$dateTime"}, 6]}, "then": "June"},
89+
{"case": {"$eq": [{"$month": "$dateTime"}, 7]}, "then": "July"},
90+
{"case": {"$eq": [{"$month": "$dateTime"}, 8]}, "then": "August"},
91+
{"case": {"$eq": [{"$month": "$dateTime"}, 9]}, "then": "September"},
92+
{"case": {"$eq": [{"$month": "$dateTime"}, 10]}, "then": "October"},
93+
{"case": {"$eq": [{"$month": "$dateTime"}, 11]}, "then": "November"},
94+
{"case": {"$eq": [{"$month": "$dateTime"}, 12]}, "then": "December"}],
95+
"default": "NO"}}}},
96+
{"$group": {"_id": "$monthName", "highest number of UFO sightings": {"$sum": 1}}},
97+
{"$sort": {"highest number of UFO sightings": -1}},
98+
{"$limit": 1}
99+
]);
100+
print(list(result1_6_3))

FIT5137_A1Report.pdf

7.69 MB
Binary file not shown.

FIT5137_Ass1_MongoDB.js

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
// use FIT5137MASL collection
2+
show dbs
3+
use FIT5137MASL
4+
show collections
5+
6+
//C.1 MongoDB shell
7+
//(i) combine the date data (from the fields for year, month, day and hour) to proper MongoDB date data types.
8+
// Use dataFromParts to concat year,month,day,hour Reference: https://docs.mongodb.com/manual/reference/operator/aggregation/dateFromParts/
9+
db.ufo.aggregate([
10+
{$project:{"dateTime":{$dateFromParts:{"year":"$year","month":"$month","day":"$day","hour":"$hour"}},_id:0}}
11+
]);
12+
13+
//(ii) store the combined date data in a new field called dateTime.
14+
// use addFields to add a new field then store combined date. Reference:https://docs.mongodb.com/manual/reference/operator/aggregation/addFields/
15+
db.ufo.aggregate([
16+
{$addFields:{"dateTime":{$dateFromParts:{"year":"$year","month":"$month","day":"$day","hour":"$hour"}}}},
17+
{$merge:{into:"ufo"}}
18+
]);
19+
20+
//(iii) remove the fields for year, month, day and hour.
21+
// use unset to delete specified fields
22+
db.ufo.updateMany({},{$unset:{"year":1,"month":1,"day":1,"hour":1}});
23+
24+
//(iv) add another field with your group name
25+
//(e.g. groupName: "Group FIT5137" if your group name is FIT5137)
26+
db.ufo.updateMany({},{$set:{"groupName":"Group Suki&Albee"}});
27+
28+
29+
//(v) store the updated collection in a new collection called ufoDates.
30+
// $out to output a new collection that stores changed ufo data.
31+
db.ufo.aggregate([
32+
{$match:{}},
33+
{$out:"ufoDates"}
34+
]);
35+
36+
37+
//C.1.4. Add data to the ufoDates collection
38+
// add new data into collection, dateTime use new Date(), sightingObs is a object contains other three string values.
39+
db.ufoDates.insertOne(
40+
{state:"MA",
41+
city:"BOSTON",
42+
countryName:"SUFFOLK",
43+
dateTime:new Date("1998-07-14T23:00:00.000+00:00"),
44+
shape:"sphere",
45+
sightingObs:
46+
[{duration:"40 min",
47+
text:"I was going to my work on my night shift at the St Albin’s hospital and saw an unearthly ray of shooting lights which could be none other than a UFO!",
48+
summary:"Unearthly ray of shooting lights"}]
49+
});
50+
51+
//C.1.5.
52+
// use update to modify data, unsure rows so use updateMany
53+
// condition is duration time, delete sightingObs field after find matching data.
54+
db.ufoDates.updateMany({"sightingObs.duration":"2 1/2 minutes"},{$unset:{"sightingObs":""}});
55+
56+
//C.1.6
57+
//(i)
58+
// conditions are state,city and time, group mathing data by state, then count numbers of data in state
59+
db.ufoDates.aggregate([
60+
{$match:{"city":"SAN FRANCISCO","state":"CA","dateTime":{$gte:ISODate("1990-01-01"),$lt:ISODate("2001-01-01")}}},
61+
{$group:{_id:"$state", totalNumOfSObs:{$sum:1}}}
62+
]);
63+
64+
65+
//(ii)
66+
//condition is fireball shape, group temp,hum,pre,rain to calculate average value and use $round to keep 3 decimal places for results
67+
//Reference:https://docs.mongodb.com/manual/reference/operator/aggregation/round/
68+
db.ufoDates.aggregate([
69+
{$match:{"shape":"fireball"}},
70+
{$group:{_id:"fireball",
71+
avgTemp:{$avg:"$weatherObs.temp"},
72+
avgHumidity:{$avg:"$weatherObs.hum"},
73+
avgPressure:{$avg:"$weatherObs.pressure"},
74+
avgRainfallObs:{$avg:"$weatherObs.rain"}}},
75+
{$project:{_id:"fireball",
76+
avgTemp:{$round:["$avgTemp",3]},
77+
avgHumidity:{$round:["$avgHumidity",3]},
78+
avgPressure:{$round:["$avgPressure",3]},
79+
avgRainfallObs:{$round:["$avgRainfallObs",3]}}}
80+
]);
81+
82+
83+
//(iii)
84+
// use $switch to change month numbers to month names. group by month and calculate numbers of each month, sort result as descending order and limit 1 output.
85+
//Reference:https://docs.mongodb.com/manual/reference/operator/aggregation/switch/
86+
db.ufoDates.aggregate([
87+
{$project:{"monthName":{$switch:{branches:[
88+
{case:{$eq:[{$month:"$dateTime"},1]},then:"January"},
89+
{case:{$eq:[{$month:"$dateTime"},2]},then:"February"},
90+
{case:{$eq:[{$month:"$dateTime"},3]},then:"March"},
91+
{case:{$eq:[{$month:"$dateTime"},4]},then:"April"},
92+
{case:{$eq:[{$month:"$dateTime"},5]},then:"May"},
93+
{case:{$eq:[{$month:"$dateTime"},6]},then:"June"},
94+
{case:{$eq:[{$month:"$dateTime"},7]},then:"July"},
95+
{case:{$eq:[{$month:"$dateTime"},8]},then:"August"},
96+
{case:{$eq:[{$month:"$dateTime"},9]},then:"September"},
97+
{case:{$eq:[{$month:"$dateTime"},10]},then:"October"},
98+
{case:{$eq:[{$month:"$dateTime"},11]},then:"November"},
99+
{case:{$eq:[{$month:"$dateTime"},12]},then:"December"}],
100+
default:"..."}}}},
101+
{$group:{_id:"$monthName", "highest number of UFO sightings":{$sum:1}}},
102+
{$sort:{"highest number of UFO sightings":-1}},
103+
{$limit:1}
104+
]);
105+
106+
//(iv)
107+
// $max and $min function can pick maximum and minimum value from fields. filter null value colour and leave 3 decimal places.
108+
// Reference:https://docs.mongodb.com/manual/reference/operator/aggregation/toUpper/
109+
db.ufoDates.aggregate([
110+
{$group:{_id:{$toUpper:"$colour"},maxTemp:{$max:"$weatherObs.temp"}, minTemp:{$min:"$weatherObs.temp"}}},
111+
{$match:{_id:{$ne:""}}},
112+
{$project:{_id:1,maxTemp:{$round:["$maxTemp",3]},minTemp:{$round:["$minTemp",3]}}}
113+
]);
114+
115+
116+
//(v)
117+
// condition is oval shape, group by direction and summarise number, sort by descending order and pick 1 row.
118+
db.ufoDates.aggregate([
119+
{$match:{shape:"oval"}},
120+
{$group:{_id:{Direction:"$weatherObs.windCond.wdire"},ObsRecord:{$sum:1}}},
121+
{$sort:{ObsRecord:-1}},
122+
{$limit:1}
123+
]);
124+
125+
126+
//(vi)
127+
// create text index for text and summary, search contents which contain light or LIGHT.
128+
//Reference:https://docs.mongodb.com/manual/reference/operator/query/text/
129+
db.ufoDates.createIndex({"sightingObs.text":"text","sightingObs.summary":"text"});
130+
db.ufoDates.aggregate([
131+
{$match:{$text:{$search:"light", $caseSensitive: false}}},
132+
{$count:"sumOfLight"}
133+
]);
134+
135+
//C.1.7. lat,long:doubles
136+
// city,state,countyname
137+
// join two collections using $lookup. different state might have same city names,so use $let to generate new fields,
138+
// use pipeline to aggregate multiple conditions. match state,city and county, then save location as one list
139+
// $unwind to split new list field, save latitude and longitude as seperate field.
140+
// output new collection
141+
//Reference:https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/
142+
db.ufoDates.aggregate([
143+
{$lookup:{from: "states",
144+
let:{"post_city":"$city","post_countyName":"$countyName","post_state":"$state"},
145+
pipeline:[
146+
{$match:
147+
{$expr:{$and:[
148+
{$eq:["$$post_city","$city"]},
149+
{$eq:["$$post_countyName","$countyName"]},
150+
{$eq:["$$post_state","$state"]}]},
151+
}},
152+
{$project:{_id:0,countyName:0,city:0,state:0}}],
153+
as:"geoDocs"}},
154+
{$unwind:"$geoDocs"},
155+
{$addFields:{"geoLat":"$geoDocs.lat","geoLng":"$geoDocs.lng"}},
156+
{$project:{"geoDocs":0}},
157+
{$out:"ufoStates"}
158+
]);
159+
160+
//C.1.8 convert the latitude and longitude [coordinate:[long,lat]
161+
// https://docs.mongodb.com/manual/reference/operator/aggregation/let/
162+
// use let to defined new attribute
163+
// https://docs.mongodb.com/manual/geospatial-queries/
164+
// geo location
165+
db.ufoStates.aggregate([
166+
{$addFields:{location:{type:"Point", coordinates:["$geoLng","$geoLat"]}}},
167+
{$project:{"geoLat":0,"geoLng":0}},
168+
{$out:"ufoStatesGeojson"}
169+
]);
170+
171+
//1.9
172+
// find which city has most records, use location info of this city
173+
// create index on coordinates[lng,lat]
174+
// $geoNear to search points near my setting range[10000,100000]
175+
// avoid same city names, so group by city and state
176+
//https://blog.mlab.com/2014/08/a-primer-on-geospatial-data-and-mongodb/
177+
//https://stackoverflow.com/questions/22374312/meteor-js-mongodb-near-geometry-geojson-point-coordinates-longitude-limit?rq=1
178+
db.ufoStatesGeojson.aggregate([
179+
{$group:{_id:"$city", sumOfCities:{$sum:1}}},
180+
{$sort:{"sumOfCities":-1}},
181+
{$limit:1}
182+
]);
183+
db.ufoStatesGeojson.find({city:{$eq:"PHOENIX"}},{"location.coordinates":1,"_id":0});
184+
db.ufoStatesGeojson.createIndex({"location.coordinates":"2dsphere"});
185+
db.ufoStatesGeojson.aggregate([
186+
{$geoNear:{near: { type: "Point", coordinates: [-112.0891, 33.5722] },
187+
spherical:true,
188+
key:"location.coordinates",
189+
distanceField:"distance",
190+
minDistance:10000,
191+
maxDistance:100000}},
192+
{$group:{_id:{"city":"$city","countyName":"$countyName","state":"$state"}}}]);
193+
194+
//1.10 output ufo.csv file
195+
// compass action

0 commit comments

Comments
 (0)