diff --git a/PlotTest.R b/PlotTest.R new file mode 100644 index 0000000..273874d --- /dev/null +++ b/PlotTest.R @@ -0,0 +1,37 @@ +setwd("/home/rohan/Git/2020-interns") +d1<-read.csv("/home/rohan/Git/2020-interns/processedJsonData/part-00000-29e2168e-0e86-498f-99c0-251d77d6c1a6-c000.csv") + + print(d1) +#day inr gbp +# 30 81.3510 0.87341 + +library(ggplot2) + +d1$inr<-round(d1$inr,1) + +print(d1) +#day inr gbp +#30 81.4 0.87341 +#10 81.2 0.90423 + +d1$gbp<-round(d1$gbp,2) + +print(d1) +#day inr gbp +#30 81.4 0.87 + +ggplot(d1,aes(day,inr,label=inr))+geom_bar(stat = "identity")+geom_text(size = 3, vjust= 2, color ="white")+theme_gray()+ggtitle("Base value of EUR is 1")+labs(x='Days of Jan 2019',y='Valuation of INR') + +ggplot(d1,aes(day,gbp,label=gbp))+geom_bar(stat = "identity")+geom_text(size = 3, vjust= 2, color ="white")+theme_gray()+ggtitle("Base value of EUR is 1")+labs(x='Days of Jan 2019',y='Valuation of GBP') + +ggplot(d1,aes(day,inr,label=inr))+geom_bar(stat = "identity")+geom_text(size = 3, vjust= 2, color ="white")+theme_gray()+ggtitle("Base value of EUR is 1")+labs(x='Days of Jan 2019',y='Valuation of INR')+labs(tag = "Latest_INR:75.21") +theme(plot.tag.position = c(0.8,1)) + +ggplot(d1,aes(day,gbp,label=gbp))+geom_bar(stat = "identity")+geom_text(size = 3, vjust= 2, color ="white")+theme_gray()+ggtitle("Base value of EUR is 1")+labs(x='Days of Jan 2019',y='Valuation of GBP')+labs(tag = "Latest_GBP:0.79") +theme(plot.tag.position = c(0.8,1)) + + + + + + + + diff --git a/WinsoftIntern2020/src/test/scala/Test.scala b/WinsoftIntern2020/src/test/scala/Test.scala new file mode 100644 index 0000000..7bc307b --- /dev/null +++ b/WinsoftIntern2020/src/test/scala/Test.scala @@ -0,0 +1,39 @@ +import org.apache.log4j.{Level, Logger} +import org.apache.spark.sql.{DataFrame, Row, SparkSession} +object Test +{ + def main(args: Array[String]): Unit = + { + Logger.getLogger("org").setLevel(Level ERROR) + + val spa=SparkSession.builder.master("local[*]").getOrCreate() + + /* val spa: SparkSession = SparkSession.builder().master("local[*]") + .appName("TestSuite") + //.config("spark.sql.shuffle.partitions", "2") + .getOrCreate()*/ + + + import spa.implicits._ + val dataFrame = spa.read.option("multiLine", true).json("/home/rohan/Git/2020-interns/data.json") + + dataFrame.show(false) + dataFrame.printSchema() + val nestedDays = dataFrame.selectExpr("rates.*") + + val dayColumn = nestedDays.columns.map(c => s"'$c', `$c`").mkString(", ") + + val filteredData = nestedDays.selectExpr(s"stack(${nestedDays.columns.length}, $dayColumn) as (`yyyy-MM-dd`, value)") + .filter(s"`yyyy-mm-dd` between '2019-01-02' and '2019-01-31'") + .selectExpr("`yyyy-mm-dd`", "value.*") + + filteredData.printSchema() + filteredData.show() + + filteredData.select("yyyy-mm-dd","INR","GBP").show(false) + + //filteredData.select("yyyy-mm-dd","INR","GBP").write.format("csv").save("/home/rohan/Git/2020-interns/processedJsonData") + + spa.stop() + } +} diff --git a/graphs/Task_1.png b/graphs/Task_1.png new file mode 100644 index 0000000..f0f14ba Binary files /dev/null and b/graphs/Task_1.png differ diff --git a/graphs/Task_2.png b/graphs/Task_2.png new file mode 100644 index 0000000..da2d20e Binary files /dev/null and b/graphs/Task_2.png differ diff --git a/graphs/Task_3_for_GBP.png b/graphs/Task_3_for_GBP.png new file mode 100644 index 0000000..66d423b Binary files /dev/null and b/graphs/Task_3_for_GBP.png differ diff --git a/graphs/Task_3_for_INR.png b/graphs/Task_3_for_INR.png new file mode 100644 index 0000000..9cda17e Binary files /dev/null and b/graphs/Task_3_for_INR.png differ diff --git a/processedData.csv b/processedData.csv new file mode 100644 index 0000000..13e7f47 --- /dev/null +++ b/processedData.csv @@ -0,0 +1,24 @@ +day,inr,gbp +30,81.351,0.87341 +10,81.2005,0.90423 +04,79.4315,0.89988 +09,80.6365,0.89913 +16,81.0055,0.8859 +28,81.232,0.86888 +22,81.046,0.88 +25,80.634,0.8658 +15,81.231,0.89025 +14,81.2195,0.89263 +08,80.245,0.89743 +21,80.9335,0.88303 +24,80.656,0.87085 +11,81.3475,0.90015 +02,79.9855,0.90165 +18,81.0875,0.88125 +31,81.686,0.87578 +07,79.6895,0.8972 +23,81.0535,0.87213 +29,81.306, 0.86735 +03,79.608,0.90312 +17,80.9765,0.8826 + diff --git a/processedJsonData/._SUCCESS.crc b/processedJsonData/._SUCCESS.crc new file mode 100644 index 0000000..3b7b044 Binary files /dev/null and b/processedJsonData/._SUCCESS.crc differ diff --git a/processedJsonData/.part-00000-29e2168e-0e86-498f-99c0-251d77d6c1a6-c000.csv.crc b/processedJsonData/.part-00000-29e2168e-0e86-498f-99c0-251d77d6c1a6-c000.csv.crc new file mode 100644 index 0000000..364fb6d Binary files /dev/null and b/processedJsonData/.part-00000-29e2168e-0e86-498f-99c0-251d77d6c1a6-c000.csv.crc differ diff --git a/processedJsonData/_SUCCESS b/processedJsonData/_SUCCESS new file mode 100644 index 0000000..e69de29 diff --git a/processedJsonData/part-00000-29e2168e-0e86-498f-99c0-251d77d6c1a6-c000.csv b/processedJsonData/part-00000-29e2168e-0e86-498f-99c0-251d77d6c1a6-c000.csv new file mode 100644 index 0000000..04213e8 --- /dev/null +++ b/processedJsonData/part-00000-29e2168e-0e86-498f-99c0-251d77d6c1a6-c000.csv @@ -0,0 +1,23 @@ +day,inr,gbp +2019-01-02,79.9855,0.90165 +2019-01-03,79.608,0.90312 +2019-01-04,79.4315,0.89988 +2019-01-07,79.6895,0.8972 +2019-01-08,80.245,0.89743 +2019-01-09,80.6365,0.89913 +2019-01-10,81.2005,0.90423 +2019-01-11,81.3475,0.90015 +2019-01-14,81.2195,0.89263 +2019-01-15,81.231,0.89025 +2019-01-16,81.0055,0.8859 +2019-01-17,80.9765,0.8826 +2019-01-18,81.0875,0.88125 +2019-01-21,80.9335,0.88303 +2019-01-22,81.046,0.88 +2019-01-23,81.0535,0.87213 +2019-01-24,80.656,0.87085 +2019-01-25,80.634,0.8658 +2019-01-28,81.232,0.86888 +2019-01-29,81.306,0.86735 +2019-01-30,81.351,0.87341 +2019-01-31,81.686,0.87578