From b8c8d868fc16bcd648b92cca0aa735bc93d6b016 Mon Sep 17 00:00:00 2001 From: pedromxavier Date: Wed, 6 Dec 2023 00:26:42 -0500 Subject: [PATCH] Add tests --- Project.toml | 1 + src/QUBOInstances.jl | 18 +- src/curation/build.jl | 6 - src/curation/curate.jl | 227 ------------- src/curation/deploy.jl | 29 -- src/curation/hash.jl | 11 - src/curation/index.jl | 308 +++++++++++++++++- src/curation/tag.jl | 37 --- src/public/archive.jl | 16 + src/public/database.jl | 4 +- src/public/list.jl | 9 +- src/public/load.jl | 17 +- .../collectionX/data/problemx.qubo | 0 .../collectionX/data/problemx2.qubo | 0 test/collections/collectionX/metadata.json | 0 .../collectionY/data/problemy.json | 0 .../collectionY/data/problemy2.json | 0 test/collections/collectionY/metadata.json | 0 test/curation.jl | 13 + test/runtests.jl | 6 +- 20 files changed, 365 insertions(+), 337 deletions(-) delete mode 100644 src/curation/build.jl delete mode 100644 src/curation/curate.jl delete mode 100644 src/curation/deploy.jl delete mode 100644 src/curation/hash.jl delete mode 100644 src/curation/tag.jl create mode 100644 src/public/archive.jl rename src/public/library.jl => test/collections/collectionX/data/problemx.qubo (100%) create mode 100644 test/collections/collectionX/data/problemx2.qubo create mode 100644 test/collections/collectionX/metadata.json create mode 100644 test/collections/collectionY/data/problemy.json create mode 100644 test/collections/collectionY/data/problemy2.json create mode 100644 test/collections/collectionY/metadata.json create mode 100644 test/curation.jl diff --git a/Project.toml b/Project.toml index 6fc9ce8..9fe3a3c 100644 --- a/Project.toml +++ b/Project.toml @@ -15,6 +15,7 @@ Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" QUBOTools = "60eb5b62-0a39-4ddc-84c5-97d2adff9319" SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9" +TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" diff --git a/src/QUBOInstances.jl b/src/QUBOInstances.jl index c3bf4b9..b36585b 100644 --- a/src/QUBOInstances.jl +++ b/src/QUBOInstances.jl @@ -9,28 +9,30 @@ using LaTeXStrings using SQLite using DataFrames using Tar +using TOML using Pkg using UUIDs using QUBOTools using ProgressMeter +const __PROJECT__ = abspath(@__DIR__, "..") +const __VERSION__ = VersionNumber(TOML.parsefile(joinpath(__PROJECT__, "Project.toml"))["version"]) + export load_instance, list_collections, list_instances, select +function data_path()::AbstractString + # return abspath(artifact"qubolib") + return @__DIR__ +end + # Public API include("public/interface.jl") -include("public/library.jl") include("public/load.jl") include("public/list.jl") +include("public/archive.jl") include("public/database.jl") # Data curation methods include("curation/index.jl") -include("curation/curate.jl") -include("curation/deploy.jl") -# include("curation/metadata.jl") -# include("curation/hash.jl") -# include("curation/tag.jl") -# include("curation/document.jl") -# include("curation/build.jl") end # module QUBOInstances diff --git a/src/curation/build.jl b/src/curation/build.jl deleted file mode 100644 index d36763a..0000000 --- a/src/curation/build.jl +++ /dev/null @@ -1,6 +0,0 @@ -function _build!(path::AbstractString; verbose::Bool = false) - - - - return nothing -end \ No newline at end of file diff --git a/src/curation/curate.jl b/src/curation/curate.jl deleted file mode 100644 index e0897af..0000000 --- a/src/curation/curate.jl +++ /dev/null @@ -1,227 +0,0 @@ -if !isdefined(LaTeXStrings, :latexescape) - const LATEX_ESCAPE_SUB_TABLE = Pair{String,String}[ - raw"\\"=>raw"\textbackslash{}", - raw"&"=>raw"\&", - raw"%"=>raw"\%", - raw"$"=>raw"\$", - raw"#"=>raw"\#", - raw"_"=>raw"\_", - raw"{"=>raw"\{", - raw"}"=>raw"\}", - raw"~"=>raw"\textasciitilde{}", - raw"^"=>raw"\^{}", - raw"<"=>raw"\textless{}", - raw">"=>raw"\textgreater{}", - ] - - function latexescape(s::AbstractString) - return replace(s, LATEX_ESCAPE_SUB_TABLE...) - end -end - -if !isdefined(LaTeXStrings, :bibtexescape) - const BIBTEX_ESCAPE_SUB_TABLE = Pair{String,String}[ - raw"\\"=>raw"\textbackslash{}", - raw"&"=>raw"\&", - raw"%"=>raw"\%", - raw"$"=>raw"\$", - raw"#"=>raw"\#", - raw"_"=>raw"\_", - raw"~"=>raw"\textasciitilde{}", - raw"^"=>raw"\^{}", - raw"<"=>raw"\textless{}", - raw">"=>raw"\textgreater{}", - ] - - function bibtexescape(s::AbstractString) - return replace(s, BIBTEX_ESCAPE_SUB_TABLE...) - end -end - -function _bibtex_entry(data::Dict{String,Any}; indent=2) - # Replace list with author names by them joined together - data["author"] = join(pop!(data, "author", []), " and ") - - # The document type / media type defaults to @misc - doctype = pop!(data, "type", "misc") - - # Citekey: use '?' as placeholder if none is given - citekey = pop!(data, "citekey", "?") - - # Get the size of longest key to align them - keysize = maximum(length.(keys(data))) - - entries = join( - [ - (" "^indent) * "$(rpad(k, keysize)) = {$(bibtexescape(string(v)))}" - for (k, v) in data - ], - "\n", - ) - - return """ - @$doctype{$citekey, - $entries - }""" -end - -function _problem_name(problem::AbstractString) - return _problem_name(artifact"collections", problem) -end - -function _problem_name(path::AbstractString, collection::AbstractString) - db = database(path::AbstractString) - - df = DBInterface.execute( - db, - "SELECT problems.name - FROM problems - INNER JOIN collections ON problems.problem=collections.problem - WHERE collections.collection = ?", - [collection] - ) |> DataFrame - - try - return only(df[!, :name]) - catch e - @show problem - @show df - rethrow(e) - end -end - -function _collection_size(collection::AbstractString) - return _collection_size(artifact"collections", collection::AbstractString) -end - -function _collection_size(path::AbstractString, collection::AbstractString) - db = database(path) - - df = DBInterface.execute( - db, - "SELECT COUNT(*) FROM instances WHERE collection = ?;", - [collection] - ) |> DataFrame - - return only(df[!, begin]) -end - -function _collection_size_range(collection::AbstractString) - return _collection_size_range(artifact"collections", collection::AbstractString) -end - -function _collection_size_range(path::AbstractString, collection::AbstractString) - db = database(path) - - df = DBInterface.execute( - db, - "SELECT MIN(size), MAX(size) FROM instances WHERE collection = ?;", - [collection] - ) |> DataFrame - - return (only(df[!, 1]), only(df[!, 2])) -end - -function curate(root_path::AbstractString, dist_path::AbstractString=abspath(root_path, "dist"); on_read_error::Function=msg -> @warn(msg)) - index = create_index(root_path, dist_path) - - curate!(index; on_read_error) - - return index -end - -function curate!(index::InstanceIndex; on_read_error::Function=msg -> @warn(msg)) - # curate collections - for collection in _list_collections(index) - # extract collection metadata - coll_metadata = _get_metadata(index, collection) - - problem = get(coll_metadata, "problem", "QUBO") - - DBInterface.execute( - index.db, - """ - INSERT INTO collections (collection, problem, size) - VALUES - (?, ?, 0); - """, - [collection, problem] - ) - - # add collection to HDF5 file - HDF5.create_group(index.fp["collections"], collection) - - @showprogress desc = "Reading instances @ '$collection'" for instance in _list_instances(index, collection) - - # Add instance to HDF5 file - HDF5.create_group(index.fp["collections"][collection], instance) - - let model = _get_instance_model(index, collection, instance; on_read_error) - isnothing(model) && continue - - dimension = QUBOTools.dimension(model) - density = QUBOTools.density(model) - linear_density = QUBOTools.linear_density(model) - quadratic_density = QUBOTools.quadratic_density(model) - - linear_min, linear_max = extrema(last, QUBOTools.linear_terms(model)) - quadratic_min, quadratic_max = extrema(last, QUBOTools.quadratic_terms(model)) - - _min = min(linear_min, quadratic_min) - _max = max(linear_max, quadratic_max) - - DBInterface.execute( - index.db, - """ - INSERT INTO instances - ( - instance, - dimension, - collection, - min, - max, - linear_min, - linear_max, - quadratic_min, - quadratic_max, - density, - linear_density, - quadratic_density - ) - VALUES - (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); - """, - [ - instance, - dimension, - collection, - _min, - _max, - linear_min, - linear_max, - quadratic_min, - quadratic_max, - density, - linear_density, - quadratic_density, - ] - ) - - # add instance to HDF5 file - QUBOTools.write_model(index.fp["collections"][collection][instance], model, QUBOTools.QUBin()) - end - end - - DBInterface.execute( - index.db, - """ - UPDATE collections - SET size = (SELECT COUNT(*) FROM instances WHERE collection == ?) - WHERE collection = ?; - """, - [collection, collection] - ) - end - - return nothing -end diff --git a/src/curation/deploy.jl b/src/curation/deploy.jl deleted file mode 100644 index a116aa2..0000000 --- a/src/curation/deploy.jl +++ /dev/null @@ -1,29 +0,0 @@ -function deploy(index::InstanceIndex; curate_data::Bool = false, on_read_error::Function=msg -> @warn(msg)) - if curate_data - curate!(index; on_read_error) - end - - deploy(index.dist_path) - - return nothing -end - -function deploy(dist_path::AbstractString) - - # Build tarball - temp_path = abspath(Tar.create(dist_path)) - - # Compress tarball - run(`gzip -9 $temp_path`) - - # Move tarball - file_path = mkpath(abspath(dist_path, "collections.tar.gz")) - - cp("$temp_path.gz", file_path; force = true) - - # Remove temporary files - rm(temp_path; force = true) - rm("$temp_path.gz"; force = true) - - return nothing -end diff --git a/src/curation/hash.jl b/src/curation/hash.jl deleted file mode 100644 index b270714..0000000 --- a/src/curation/hash.jl +++ /dev/null @@ -1,11 +0,0 @@ -function _hash!(path::AbstractString; verbose::Bool = false) - verbose && @info "Computing Tree Hash" - - hash_path = abspath(joinpath(path, "..", "tree.hash")) - - write(hash_path, bytes2hex(Pkg.GitTools.tree_hash(path))) - - verbose && @info "Hash written to '$hash_path'" - - return nothing -end \ No newline at end of file diff --git a/src/curation/index.jl b/src/curation/index.jl index 1d193f9..020bb28 100644 --- a/src/curation/index.jl +++ b/src/curation/index.jl @@ -84,6 +84,8 @@ struct InstanceIndex root_path::String dist_path::String list_path::String + tree_hash::Ref{String} + next_tag::Ref{String} end function create_index( @@ -99,7 +101,7 @@ function create_index( @assert isdir(list_path) "'$list_path' is not a directory" - return InstanceIndex(db, fp, abspath(root_path), abspath(dist_path), list_path) + return InstanceIndex(db, fp, abspath(root_path), abspath(dist_path), list_path, Ref{String}(), Ref{String}()) end function _list_collections(path::AbstractString) @@ -163,3 +165,307 @@ end function _get_instance_model(index::InstanceIndex, collection::AbstractString, instance::AbstractString; on_read_error::Function=msg -> @warn(msg)) return _get_instance_model(index.list_path, collection, instance; on_read_error) end + +function hash!(index::InstanceIndex) + index.tree_hash[] = bytes2hex(Pkg.GitTools.tree_hash(index.list_path)) + + return nothing +end + +function deploy(index::InstanceIndex; curate_data::Bool = false, on_read_error::Function=msg -> @warn(msg)) + if curate_data + curate!(index; on_read_error) + end + + deploy(index.dist_path) + + return nothing +end + +function deploy(dist_path::AbstractString) + # Build tarball + temp_path = abspath(Tar.create(dist_path)) + + # Compress tarball + run(`gzip -9 $temp_path`) + + # Move tarball + file_path = mkpath(abspath(dist_path, "qubolib.tar.gz")) + + rm(file_path; force = true) + + cp("$temp_path.gz", file_path; force = true) + + # Remove temporary files + rm(temp_path; force = true) + rm("$temp_path.gz"; force = true) + + return nothing +end + +function tag(path::AbstractString) + last_tag_path = abspath(path, "last.tag") + + if isfile(last_tag_path) + text = read(last_tag_path, String) + + m = match(r"tag:\s*v(.*)", text) + + if isnothing(m) + @error("Tag not found in '$last_tag_path'") + + exit(1) + end + + last_tag = parse(VersionNumber, m[1]) + + next_tag_path = abspath(path, "next.tag") + + next_tag = VersionNumber( + last_tag.major, + last_tag.minor, + last_tag.patch + 1, + last_tag.prerelease, + last_tag.build, + ) + + return "v$next_tag" + else + @error("File '$last_tag_path' not found") + + exit(1) + end + + return nothing +end + +function tag!(index::InstanceIndex) + index.next_tag = tag(index.root_path) + + return nothing +end + +if !isdefined(LaTeXStrings, :latexescape) + function latexescape(s::AbstractString) + return replace( + s, + raw"\\" => raw"\textbackslash{}", + raw"&" => raw"\&", + raw"%" => raw"\%", + raw"$" => raw"\$", + raw"#" => raw"\#", + raw"_" => raw"\_", + raw"{" => raw"\{", + raw"}" => raw"\}", + raw"~" => raw"\textasciitilde{}", + raw"^" => raw"\^{}", + raw"<" => raw"\textless{}", + raw">" => raw"\textgreater{}", + ) + end +end + +if !isdefined(LaTeXStrings, :bibtexescape) + function bibtexescape(s::AbstractString) + return replace(s, + raw"\\" => raw"\textbackslash{}", + raw"&" => raw"\&", + raw"%" => raw"\%", + raw"$" => raw"\$", + raw"#" => raw"\#", + raw"_" => raw"\_", + raw"~" => raw"\textasciitilde{}", + raw"^" => raw"\^{}", + raw"<" => raw"\textless{}", + raw">" => raw"\textgreater{}", + ) + end +end + +function _bibtex_entry(data::Dict{String,Any}; indent=2) + # Replace list with author names by them joined together + data["author"] = join(pop!(data, "author", []), " and ") + + # The document type / media type defaults to @misc + doctype = pop!(data, "type", "misc") + + # Citekey: use '?' as placeholder if none is given + citekey = pop!(data, "citekey", "?") + + # Get the size of longest key to align them + keysize = maximum(length.(keys(data))) + + entries = join( + [ + (" "^indent) * "$(rpad(k, keysize)) = {$(bibtexescape(string(v)))}" + for (k, v) in data + ], + "\n", + ) + + return """ + @$doctype{$citekey, + $entries + }""" +end + +function _problem_name(problem::AbstractString) + return _problem_name(data_path(), problem) +end + +function _problem_name(path::AbstractString, collection::AbstractString) + db = database(path::AbstractString) + + df = DBInterface.execute( + db, + "SELECT problems.name + FROM problems + INNER JOIN collections ON problems.problem=collections.problem + WHERE collections.collection = ?", + [collection] + ) |> DataFrame + + try + return only(df[!, :name]) + catch e + @show problem + @show df + rethrow(e) + end +end + +function _collection_size(collection::AbstractString) + return _collection_size(data_path(), collection::AbstractString) +end + +function _collection_size(path::AbstractString, collection::AbstractString) + db = database(path) + + df = DBInterface.execute( + db, + "SELECT COUNT(*) FROM instances WHERE collection = ?;", + [collection] + ) |> DataFrame + + return only(df[!, begin]) +end + +function _collection_size_range(collection::AbstractString) + return _collection_size_range(data_path(), collection::AbstractString) +end + +function _collection_size_range(path::AbstractString, collection::AbstractString) + db = database(path) + + df = DBInterface.execute( + db, + "SELECT MIN(size), MAX(size) FROM instances WHERE collection = ?;", + [collection] + ) |> DataFrame + + return (only(df[!, 1]), only(df[!, 2])) +end + +function curate(root_path::AbstractString, dist_path::AbstractString=abspath(root_path, "dist"); on_read_error::Function=msg -> @warn(msg)) + index = create_index(root_path, dist_path) + + curate!(index; on_read_error) + + return index +end + +function curate!(index::InstanceIndex; on_read_error::Function=msg -> @warn(msg)) + # curate collections + for collection in _list_collections(index) + # extract collection metadata + coll_metadata = _get_metadata(index, collection) + + problem = get(coll_metadata, "problem", "QUBO") + + DBInterface.execute( + index.db, + """ + INSERT INTO collections (collection, problem, size) + VALUES + (?, ?, 0); + """, + [collection, problem] + ) + + # add collection to HDF5 file + HDF5.create_group(index.fp["collections"], collection) + + @showprogress desc = "Reading instances @ '$collection'" for instance in _list_instances(index, collection) + + # Add instance to HDF5 file + HDF5.create_group(index.fp["collections"][collection], instance) + + let model = _get_instance_model(index, collection, instance; on_read_error) + isnothing(model) && continue + + dimension = QUBOTools.dimension(model) + density = QUBOTools.density(model) + linear_density = QUBOTools.linear_density(model) + quadratic_density = QUBOTools.quadratic_density(model) + + linear_min, linear_max = extrema(last, QUBOTools.linear_terms(model)) + quadratic_min, quadratic_max = extrema(last, QUBOTools.quadratic_terms(model)) + + _min = min(linear_min, quadratic_min) + _max = max(linear_max, quadratic_max) + + DBInterface.execute( + index.db, + """ + INSERT INTO instances + ( + instance, + dimension, + collection, + min, + max, + linear_min, + linear_max, + quadratic_min, + quadratic_max, + density, + linear_density, + quadratic_density + ) + VALUES + (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); + """, + [ + instance, + dimension, + collection, + _min, + _max, + linear_min, + linear_max, + quadratic_min, + quadratic_max, + density, + linear_density, + quadratic_density, + ] + ) + + # add instance to HDF5 file + QUBOTools.write_model(index.fp["collections"][collection][instance], model, QUBOTools.QUBin()) + end + end + + DBInterface.execute( + index.db, + """ + UPDATE collections + SET size = (SELECT COUNT(*) FROM instances WHERE collection == ?) + WHERE collection = ?; + """, + [collection, collection] + ) + end + + return nothing +end diff --git a/src/curation/tag.jl b/src/curation/tag.jl deleted file mode 100644 index 69e53e1..0000000 --- a/src/curation/tag.jl +++ /dev/null @@ -1,37 +0,0 @@ -function _tag!(path::AbstractString; verbose::Bool = false) - file_path = joinpath(path, "last.txt") - - if isfile(file_path) - text = read(file_path, String) - - m = match(r"tag:\s*v(.*)", text) - - if isnothing(m) - @error("Tag not found in 'last.txt'") - - exit(1) - end - - last_tag = parse(VersionNumber, m[1]) - - verbose && @info "Last tag: $last_tag" - - next_tag_path = joinpath(path, "next.tag") - - next_tag = VersionNumber( - last_tag.major, - last_tag.minor, - last_tag.patch + 1, - last_tag.prerelease, - last_tag.build, - ) - - verbose && @info "Next tag: $next_tag" - - write(next_tag_path, "v$next_tag") - else - error("File 'last.txt' not found") - end - - return nothing -end diff --git a/src/public/archive.jl b/src/public/archive.jl new file mode 100644 index 0000000..804c0e0 --- /dev/null +++ b/src/public/archive.jl @@ -0,0 +1,16 @@ +function archive(callback::Function, path::AbstractString) + return HDF5.h5open(callback, abspath(path, "archive.h5"), "r") +end + +function archive(path::AbstractString) + return HDF5.h5open(abspath(path, "archive.h5"), "r") +end + +function archive(callback::Function) + return archive(callback, data_path()) +end + +function archive() + return archive(data_path()) +end + \ No newline at end of file diff --git a/src/public/database.jl b/src/public/database.jl index 6861f3a..011a829 100644 --- a/src/public/database.jl +++ b/src/public/database.jl @@ -1,7 +1,7 @@ function database(path::AbstractString) - return SQLite.DB(joinpath(path, "index.sqlite")) + return SQLite.DB(abspath(path, "index.sqlite")) end function database() - return database(artifact"collections") + return database(data_path()) end diff --git a/src/public/list.jl b/src/public/list.jl index a466d04..2d55c68 100644 --- a/src/public/list.jl +++ b/src/public/list.jl @@ -3,4 +3,11 @@ function list_collections() df = DBInterface.execute(db, "SELECT collection FROM collections") |> DataFrame return collect(df[!, :collection]) -end \ No newline at end of file +end + +function list_instances(collection::AbstractString) + db = database() + df = DBInterface.execute(db, "SELECT instance FROM instances WHERE collection = ?", [collection]) |> DataFrame + + return collect(df[!, :instance]) +end diff --git a/src/public/load.jl b/src/public/load.jl index 7d5164d..8702259 100644 --- a/src/public/load.jl +++ b/src/public/load.jl @@ -1,20 +1,9 @@ function load_instance(collection::AbstractString, instance::AbstractString) - return load_instance(artifact"collections", collection, instance) + return load_instance(data_path(), collection, instance) end function load_instance(path::AbstractString, collection::AbstractString, instance::AbstractString) - collpath = joinpath(path, collection) - - if !isdir(collpath) - error("Unknown collection '$collection'") - end - - instpath = joinpath(collpath, "data", instance) - - if !isfile(instpath) - error("Unknown instance '$instance'") + return archive(path) do fp + return QUBOTools.read_model(fp["collections"][collection][instance], QUBOTools.QUBin()) end - - # QUBOTools must be able to infer the format from the file extension - return QUBOTools.read_model(instpath) end diff --git a/src/public/library.jl b/test/collections/collectionX/data/problemx.qubo similarity index 100% rename from src/public/library.jl rename to test/collections/collectionX/data/problemx.qubo diff --git a/test/collections/collectionX/data/problemx2.qubo b/test/collections/collectionX/data/problemx2.qubo new file mode 100644 index 0000000..e69de29 diff --git a/test/collections/collectionX/metadata.json b/test/collections/collectionX/metadata.json new file mode 100644 index 0000000..e69de29 diff --git a/test/collections/collectionY/data/problemy.json b/test/collections/collectionY/data/problemy.json new file mode 100644 index 0000000..e69de29 diff --git a/test/collections/collectionY/data/problemy2.json b/test/collections/collectionY/data/problemy2.json new file mode 100644 index 0000000..e69de29 diff --git a/test/collections/collectionY/metadata.json b/test/collections/collectionY/metadata.json new file mode 100644 index 0000000..e69de29 diff --git a/test/curation.jl b/test/curation.jl new file mode 100644 index 0000000..0e6d1c8 --- /dev/null +++ b/test/curation.jl @@ -0,0 +1,13 @@ +function test_curation() + @testset "□ Curation Routines" begin + let index = QUBOInstances.create_index( + abspath(@__DIR__, "data", "collections") + ) + + @test index.root_path == abspath(@__DIR__, "data", "collections") + + end + end + + return nothing +end diff --git a/test/runtests.jl b/test/runtests.jl index a8eee0c..c307264 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,8 +1,12 @@ using Test using QUBOInstances +include("curation.jl") + function main() - @test true + @testset "♣ QUBOInstances.jl «$(QUBOInstances.__VERSION__)» Test Suite ♣" verbose = true begin + test_curation() + end return nothing end