Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
sudo apt-get install -y sbt

- name: Run tests
run: sbt -J--add-opens=java.base/sun.nio.ch=ALL-UNNAMED clean test
run: make test

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v3
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ format:
sbt scalafmt

test:
sbt -J--add-opens=java.base/sun.nio.ch=ALL-UNNAMED test
sbt -J--add-opens=java.base/sun.nio.ch=ALL-UNNAMED coverage test coverageReport
@make clean
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ThisBuild / version := "1.5.0"
ThisBuild / version := "1.6.0"

ThisBuild / scalaVersion := "2.12.20"

Expand All @@ -24,7 +24,7 @@ libraryDependencies ++= Seq(
"io.circe" %% "circe-yaml" % "1.15.0",
"io.circe" %% "circe-core" % circeVersion,
"io.circe" %% "circe-parser" % circeVersion,
"com.swoop" %% "spark-alchemy" % "1.2.1"
"com.swoop" %% "spark-alchemy" % "1.2.1",
)

dependencyOverrides += "org.scala-lang.modules" %% "scala-xml" % "2.4.0"
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.3.1")
33 changes: 16 additions & 17 deletions src/main/scala/org/mitchelllisle/analysers/MerkleTree.scala
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
package org.mitchelllisle.analysers

import org.apache.spark.sql.{DataFrame, functions => F}
import org.apache.spark.sql.types._
import java.security.MessageDigest
import java.time.Instant
import scala.annotation.tailrec

case class MerkleProof(
rootHash: String,
recordCount: Long,
leafHashes: Seq[String],
timestamp: Instant = Instant.now()
)

case class DeletionProof(
beforeProof: MerkleProof,
afterProof: MerkleProof,
deletedRecordHashes: Seq[String],
merklePathProofs: Seq[String]
)

/** MerkleTreeAnalyser provides cryptographic proof capabilities for data retention and deletion verification.
* This complements the KHyperLogLogAnalyser by adding tamper-evident audit trails.
*/
object MerkleTree {

case class MerkleProof(
rootHash: String,
recordCount: Long,
leafHashes: Seq[String],
timestamp: Long = System.currentTimeMillis()
)

case class DeletionProof(
beforeProof: MerkleProof,
afterProof: MerkleProof,
deletedRecordHashes: Seq[String],
merklePathProofs: Seq[String]
)

/** Main entry point for creating a Merkle proof. This is the standard way to use MerkleTree.
*
* @param data The DataFrame to create proof for
Expand Down Expand Up @@ -98,7 +97,7 @@ object MerkleTree {
rootHash = rootHash,
recordCount = recordCount,
leafHashes = leafHashes,
timestamp = System.currentTimeMillis()
timestamp = Instant.now()
)
}

Expand Down
14 changes: 13 additions & 1 deletion src/test/scala/MerkleTreeTest.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import org.apache.spark.sql.DataFrame
import org.mitchelllisle.analysers.MerkleTree
import org.scalatest.flatspec.AnyFlatSpec
import java.time.Instant

class MerkleTreeTest extends AnyFlatSpec with SparkFunSuite {
import spark.implicits._
Expand All @@ -21,7 +22,8 @@ class MerkleTreeTest extends AnyFlatSpec with SparkFunSuite {
assert(proof.rootHash.nonEmpty)
assert(proof.recordCount == 4)
assert(proof.leafHashes.length == 4)
assert(proof.timestamp > 0)
assert(proof.timestamp.isInstanceOf[Instant])
assert(proof.timestamp.isBefore(Instant.now().plusSeconds(1)))
}

"apply" should "produce same result as createMerkleProof" in {
Expand Down Expand Up @@ -183,4 +185,14 @@ class MerkleTreeTest extends AnyFlatSpec with SparkFunSuite {

assert(hashes1.sameElements(hashes2))
}

"timestamp" should "use proper Instant type and be recent" in {
val beforeTime = Instant.now().minusSeconds(1)
val proof = MerkleTree.createMerkleProof(testData, columns, idColumn)
val afterTime = Instant.now().plusSeconds(1)

assert(proof.timestamp.isInstanceOf[Instant])
assert(proof.timestamp.isAfter(beforeTime))
assert(proof.timestamp.isBefore(afterTime))
}
}