diff --git a/.gitignore b/.gitignore index 4211a325..e3fa7c50 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,9 @@ wrapperTests.txt # binaries # as these are build-dependent, binaries should not be committed *.o +*.so *.a +*.dSYM ex-01 ex-01-adjoint ex-01-expanded diff --git a/braid/Makefile b/braid/Makefile index 09ce70eb..a86b5db6 100644 --- a/braid/Makefile +++ b/braid/Makefile @@ -45,7 +45,6 @@ BRAID_FILES =\ base.c\ braid.c\ braid_status.c\ - braid_F90_iface.c\ braid_test.c\ communication.c\ delta.c\ @@ -66,6 +65,10 @@ BRAID_FILES =\ util.c\ uvector.c +ifeq ($(fortran),yes) + BRAID_FILES += braid_F90_iface.c +endif + ifeq ($(sequential),yes) BRAID_HEADERS += mpistubs.h BRAID_FILES += mpistubs.c @@ -74,6 +77,12 @@ else SEQFLAGS = endif +ifeq ($(shared),yes) + BRAID_LIBFILES = libbraid.a libbraid.so +else + BRAID_LIBFILES = libbraid.a +endif + BRAID_OBJ = $(BRAID_FILES:.c=.o) .PHONY: all clean @@ -84,13 +93,17 @@ BRAID_OBJ = $(BRAID_FILES:.c=.o) %.o: %.c $(BRAID_HEADERS) $(MPICC) $(SEQFLAGS) $(CFLAGS) -c $< -o $@ +all: $(BRAID_LIBFILES) + libbraid.a: $(BRAID_HEADERS) $(BRAID_OBJ) @echo "Building" $@ "..." ar cruv libbraid.a $(BRAID_OBJ) ranlib libbraid.a -all: libbraid.a +libbraid.so: $(BRAID_HEADERS) $(BRAID_OBJ) + @echo "Building" $@ "..." + $(MPICC) $(SEQFLAGS) $(CFLAGS) $(SHAREDFLAGS) $(BRAID_OBJ) -o $@ clean: - rm -f *.o libbraid.a + rm -f *.o libbraid.a libbraid.so diff --git a/braid/braid.c b/braid/braid.c index 5cc597df..bf64dfe2 100644 --- a/braid/braid.c +++ b/braid/braid.c @@ -493,7 +493,6 @@ braid_Destroy(braid_Core core) braid_App app = _braid_CoreElt(core, app); braid_Int nlevels = _braid_CoreElt(core, nlevels); _braid_Grid **grids = _braid_CoreElt(core, grids); - braid_Int cfactor = _braid_GridElt(grids[0], cfactor); braid_Int gupper = _braid_CoreElt(core, gupper); braid_Int richardson = _braid_CoreElt(core, richardson); braid_Int est_error = _braid_CoreElt(core, est_error); @@ -509,24 +508,54 @@ braid_Destroy(braid_Core core) _braid_TFree(_braid_CoreElt(core, tnorm_a)); _braid_TFree(_braid_CoreElt(core, rdtvalues)); - /* Destroy stored Lyapunov exponents */ - if (_braid_CoreElt(core, lyap_exp)) + if (grids[0] != NULL) { + braid_Int cfactor = _braid_GridElt(grids[0], cfactor); + /* Destroy stored Lyapunov exponents */ + if (_braid_CoreElt(core, lyap_exp)) + { + + braid_Int npoints; + if (nlevels == 1) + { + npoints = _braid_GridElt(grids[0], iupper) - _braid_GridElt(grids[0], ilower); + } + else + { + npoints = _braid_GridElt(grids[0], ncpoints); + } + for (braid_Int i = 0; i < npoints; i++) + { + _braid_TFree(_braid_CoreElt(core, local_exponents)[i]); + } + _braid_TFree(_braid_CoreElt(core, local_exponents)); + } + - braid_Int npoints; - if (nlevels == 1) + /* Free last time step, if set */ + if ( (_braid_CoreElt(core, storage) < 0) && !(_braid_IsCPoint(gupper, cfactor)) ) { - npoints = _braid_GridElt(grids[0], iupper) - _braid_GridElt(grids[0], ilower); + if (_braid_GridElt(grids[0], ulast) != NULL) + { + _braid_BaseFree(core, app, _braid_GridElt(grids[0], ulast)); + } } - else + + /* Destroy Richardson estimate structures */ + if ( richardson || est_error ) { - npoints = _braid_GridElt(grids[0], ncpoints); + _braid_TFree(_braid_CoreElt(core, dtk)); + if (est_error) + { + _braid_TFree(_braid_CoreElt(core, estimate)); + } } - for (braid_Int i = 0; i < npoints; i++) + + for (level = 0; level < nlevels; level++) { - _braid_TFree(_braid_CoreElt(core, local_exponents)[i]); + _braid_GridDestroy(core, grids[level]); } - _braid_TFree(_braid_CoreElt(core, local_exponents)); + } /* Destroy the optimization structure */ @@ -537,47 +566,19 @@ braid_Destroy(braid_Core core) _braid_TFree(_braid_CoreElt(core, optim)); } - /* Free last time step, if set */ - if ( (_braid_CoreElt(core, storage) < 0) && !(_braid_IsCPoint(gupper, cfactor)) ) - { - if (_braid_GridElt(grids[0], ulast) != NULL) - { - _braid_BaseFree(core, app, _braid_GridElt(grids[0], ulast)); - } - } - - /* Destroy Richardson estimate structures */ - if ( richardson || est_error ) - { - _braid_TFree(_braid_CoreElt(core, dtk)); - if (est_error) - { - _braid_TFree(_braid_CoreElt(core, estimate)); - } - } - - for (level = 0; level < nlevels; level++) - { - _braid_GridDestroy(core, grids[level]); - } - _braid_TFree(grids); - _braid_TFree(core); if (timer_file_stem != NULL) { _braid_TFree(timer_file_stem); } - } if (_braid_printfile != NULL) { fclose(_braid_printfile); } - - return _braid_error_flag; } diff --git a/braid/braid.h b/braid/braid.h index 3606ed0c..e4f12483 100644 --- a/braid/braid.h +++ b/braid/braid.h @@ -58,14 +58,17 @@ extern "C" { /** Define Fortran name-mangling schema, there are four supported options, see braid_F90_iface.c */ #define braid_FMANGLE 1 -/** Turn on the optional user-defined spatial coarsening and refinement functions */ +/** Turn on/off the Fortran interface (useful for avoiding undefined symbol + * errors in shared library builds) */ +#define braid_Fortran_Iface 0 +/** Turn on/off the optional user-defined spatial coarsening and refinement functions */ #define braid_Fortran_SpatialCoarsen 0 -/** Turn on the optional user-defined residual function */ -#define braid_Fortran_Residual 1 -/** Turn on the optional user-defined time-grid function */ -#define braid_Fortran_TimeGrid 1 -/** Turn on the optional user-defined sync function */ -#define braid_Fortran_Sync 1 +/** Turn on/off the optional user-defined residual function */ +#define braid_Fortran_Residual 0 +/** Turn on/off the optional user-defined time-grid function */ +#define braid_Fortran_TimeGrid 0 +/** Turn on/off the optional user-defined sync function */ +#define braid_Fortran_Sync 0 /** @} */ @@ -605,7 +608,7 @@ braid_PrintTimers(braid_Core core /**< braid_Core (_braid_Core) struc **/ braid_Int braid_ResetTimer(braid_Core core /**< braid_Core (_braid_Core) struct*/ -); + ); /** * After Drive() finishes, this function can be called to write out the convergence history diff --git a/braid/braid.jl/BraidUtils.jl b/braid/braid.jl/BraidUtils.jl new file mode 100644 index 00000000..9e6746a3 --- /dev/null +++ b/braid/braid.jl/BraidUtils.jl @@ -0,0 +1,54 @@ +module BraidUtils +export libbraid, c_stdout, BlackHoleBuffer, malloc_null_double_ptr, stacktrace_warn + +libbraid = joinpath(dirname(@__DIR__), "libbraid.so") +c_stdout = Libc.FILE(Libc.RawFD(1), "w") # corresponds to C standard output + + +""" +Allocates a valid pointer to a pointer of type T +""" +function malloc_null_double_ptr(T::Type) + pp = Base.Libc.malloc(sizeof(Ptr{Cvoid})) + pp = reinterpret(Ptr{Ptr{T}}, pp) + return pp +end + +""" +Displays a caught error message, including stacktrace, without throwing +""" +function stacktrace_warn(msg::String, err) + err_msg = sprint(showerror, err) + trace = sprint((io,v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) + @warn "$(msg):\n$(err_msg)\n$(trace)" +end + +""" +Where'd all my data go? +This extends Base to include a buffer which throws away the data written to it +(useful for measuring the serialized size of an object) +""" +mutable struct BlackHoleBuffer <: IO + ptr::Int +end +BlackHoleBuffer() = BlackHoleBuffer(0) + +function Base.read(from::BlackHoleBuffer, T::Type{UInt8}) + throw(ArgumentError("BlackHoleBuffer is not readable)")) +end +function Base.write(to::BlackHoleBuffer, x::UInt8) + to.ptr += 1 + return 1 +end +function Base.write(to::BlackHoleBuffer, x::Array{T}) where T + to.ptr += sizeof(x) + return sizeof(x) +end + +end # module BraidUtils + +# helper functions +isCPoint(i::Integer, cfactor::Integer)::Bool = ((i-1) % cfactor == 0) +isFPoint(i::Integer, cfactor::Integer)::Bool = !isCPoint(i, cfactor) +mapFineToCoarse(i::Integer, cfactor::Integer)::Integer = (i-1) ÷ cfactor + 1 +mapCoarseToFine(i::Integer, cfactor::Integer)::Integer = (i-1) * cfactor + 1 \ No newline at end of file diff --git a/braid/braid.jl/Status.jl b/braid/braid.jl/Status.jl new file mode 100644 index 00000000..71459e27 --- /dev/null +++ b/braid/braid.jl/Status.jl @@ -0,0 +1,270 @@ +#=BHEADER********************************************************************** + * Copyright (c) 2013, Lawrence Livermore National Security, LLC. + * Produced at the Lawrence Livermore National Laboratory. + * + * This file is part of XBraid. For support, post issues to the XBraid Github page. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License (as published by the Free Software + * Foundation) version 2.1 dated February 1999. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the terms and conditions of the GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************EHEADER=# + + +module Status +export AccessStatus, StepStatus, SyncStatus +export CallerError, CallerFInterp, CallerFRestrict, CallerFRefine, CallerFAccess +export CallerFRefine_AfterInitHier, CallerDrive_TopCycle, CallerFCRelax +export CallerDrive_AfterInit, CallerBaseStep_diff, CallerComputeFullRNorm +export CallerFASResidual, CallerResidual, CallerInitGuess, CallerWarmup + +using ..XBraid.BraidUtils: libbraid, malloc_null_double_ptr +using ..XBraid: BraidVector + +# using MPI: MPI_Comm, Comm + +# calling functions +const CallerError = -1 +const CallerFInterp = 0 +const CallerFRestrict = 1 +const CallerFRefine = 2 +const CallerFAccess = 3 +const CallerFRefine_AfterInitHier = 4 +const CallerDrive_TopCycle = 5 +const CallerFCRelax = 6 +const CallerDrive_AfterInit = 7 +const CallerBaseStep_diff = 8 +const CallerComputeFullRNorm = 9 +const CallerFASResidual = 10 +const CallerResidual = 11 +const CallerInitGuess = 12 +const CallerWarmup = 13 + +# status structures +struct StepStatus + ptr::Ptr{Cvoid} +end +StepStatus() = StepStatus(C_NULL) + +struct AccessStatus + ptr::Ptr{Cvoid} +end +AccessStatus() = AccessStatus(C_NULL) + +struct SyncStatus + ptr::Ptr{Cvoid} +end +SyncStatus() = SyncStatus(C_NULL) + +# status get/set functions (not exported) +# @ccall requires these type annotations to be known at compile time, so macros won't work here +function getT(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return 0.0 + t = Ref{Cdouble}() + @ccall libbraid.braid_StatusGetT(status.ptr::Ptr{Cvoid}, t::Ref{Cdouble})::Cint + return t[] +end + +function getTStop(status::StepStatus) + status.ptr == C_NULL && return 0.0 + tstop = Ref{Cdouble}() + @ccall libbraid.braid_StatusGetTStop(status.ptr::Ptr{Cvoid}, tstop::Ref{Cdouble})::Cint + return tstop[] +end + +function getTstartTstop(status::StepStatus) + status.ptr == C_NULL && return (0.0, 0.0) + tstart, tstop = Ref{Cdouble}(), Ref{Cdouble}() + @ccall libbraid.braid_StepStatusGetTstartTstop(status.ptr::Ptr{Cvoid}, tstart::Ref{Cdouble}, tstop::Ref{Cdouble})::Cint + return tstart[], tstop[] +end + +function getTIndex(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return 0 + ti = Ref{Cint}() + @ccall libbraid.braid_StatusGetTIndex(status.ptr::Ptr{Cvoid}, ti::Ref{Cint})::Cint + # julia uses 1-based indexing + return ti[] + 1 +end + +function getLevel(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return 0 + level = Ref{Cint}() + @ccall libbraid.braid_StatusGetLevel(status.ptr::Ptr{Cvoid}, level::Ref{Cint})::Cint + return level[] +end + +function getCFactor(status::Union{StepStatus, AccessStatus}, level::Integer) + status.ptr == C_NULL && return 1 + cfactor = Ref{Cint}() + @ccall libbraid.braid_StatusGetCFactor(status.ptr::Ptr{Cvoid}, cfactor::Ref{Cint}, level::Cint)::Cint + return cfactor[] +end + +function getCFactor(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return 0 + getCFactor(status, getLevel(status)) +end + +function getWrapperTest(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return false + wtest = Ref{Cint}() + @ccall libbraid.braid_StatusGetWrapperTest(status.ptr::Ptr{Cvoid}, wtest::Ref{Cint})::Cint + return (wtest[] > 0) +end + +function getIter(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return 0 + iter = Ref{Cint}() + @ccall libbraid.braid_StatusGetIter(status.ptr::Ptr{Cvoid}, iter::Ref{Cint})::Cint + return iter[] +end + +function getNLevels(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return 0 + nlevels = Ref{Cint}() + @ccall libbraid.braid_StatusGetNLevels(status.ptr::Ptr{Cvoid}, nlevels::Ref{Cint})::Cint + return nlevels[] +end + +function setRFactor(status::StepStatus, rfactor::Real) + status.ptr == C_NULL && return nothing + @ccall libbraid.braid_StatusSetRFactor(status.ptr::Ptr{Cvoid}, rfactor::Cdouble)::Cint + return nothing +end + +function getNRefine(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return 0 + nrefine = Ref{Cint}() + @ccall libbraid.braid_StatusGetNRefine(status.ptr::Ptr{Cvoid}, nrefine::Ref{Cint})::Cint + return nrefine[] +end + +function getNTPoints(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return 0 + ntpoints = Ref{Cint}() + @ccall libbraid.braid_StatusGetNTPoints(status.ptr::Ptr{Cvoid}, ntpoints::Ref{Cint})::Cint + return ntpoints[] +end + +function getResidual(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return 0.0 + res = Ref{Cdouble}() + @ccall libbraid.braid_StatusGetResidual(status.ptr::Ptr{Cvoid}, res::Ref{Cdouble})::Cint + return res[] +end + +function getDone(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return false + done = Ref{Cint}() + @ccall libbraid.braid_StatusGetDone(status.ptr::Ptr{Cvoid}, done::Ref{Cint})::Cint + return (done[] > 0) +end + +function getTILD(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return (0.0, 0, 0, false) + time = Ref{Cdouble}() + iter = Ref{Cint}() + level = Ref{Cint}() + done = Ref{Cint}() + @ccall libbraid.braid_StatusGetTILD(status.ptr::Ptr{Cvoid}, time::Ref{Cdouble}, iter::Ref{Cint}, level::Ref{Cint}, done::Ref{Cint})::Cint + return time[], iter[], level[], (done[] > 0) +end + +function getTIUL(status::Union{StepStatus, SyncStatus}, level::Integer) + status.ptr == C_NULL && return (0, 0) + iu = Ref{Cint}() + il = Ref{Cint}() + @ccall libbraid.braid_StatusGetTIUL(status.ptr::Ptr{Cvoid}, iu::Ref{Cint}, il::Ref{Cint}, level::Cint)::Cint + # julia uses 1-based indexing + return iu[] + 1, il[] + 1 +end + +function getCallingFunction(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return CallerError + func = Ref{Cint}() + @ccall libbraid.braid_StatusGetCallingFunction(status.ptr::Ptr{Cvoid}, func::Ref{Cint})::Cint + return func[] +end + +function getTol(status::StepStatus) + status.ptr == C_NULL && return 0.0 + tol = Ref{Cdouble}() + @ccall libbraid.braid_StatusGetTol(status.ptr::Ptr{Cvoid}, tol::Ref{Cdouble})::Cint + return tol[] +end + +function getRNorms(status::StepStatus) + status.ptr == C_NULL && return zeros(0) + iters = getIter(status) + nrequest = Ref{Cint}(iters) + norms = zeros(Cdouble, iters) + @ccall libbraid.braid_StatusGetRNorms(status.ptr::Ptr{Cvoid}, nrequest::Ref{Cint}, norms::Ref{Cdouble})::Cint + return norms +end + +function getProc(status::Union{StepStatus, AccessStatus, SyncStatus}) + status.ptr == C_NULL && return 0 + proc = Ref{Cint}() + @ccall libbraid.braid_StatusGetProc(status.ptr::Ptr{Cvoid}, proc::Ref{Cint})::Cint + return proc[] +end + +# function getTComm(status::SyncStatus) +# status.ptr == C_NULL && return MPI.Comm(MPI.COMM_NULL) +# tcomm = Ref{MPI.MPI_Comm}() +# @ccall libbraid.braid_StatusGetTComm(status.ptr::Ptr{Cvoid}, tcomm::Ref{MPI.MPI_Comm})::Cint +# return MPI.Comm(tcomm[]) +# end + +function getDeltaRank(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return 0 + rank = Ref{Cint}() + @ccall libbraid.braid_StatusGetDeltaRank(status.ptr::Ptr{Cvoid}, rank::Ref{Cint})::Cint + return rank[] +end + +# These are special cases +function getLocalLyapExponents(status::AccessStatus) + status.ptr == C_NULL && return zeros(0) + rank = getDeltaRank(status) + rank < 1 && return [] + + exps = zeros(rank) + num_retrieved = Ref(rank) + @ccall libbraid.braid_StatusGetLocalLyapExponents(status.ptr::Ptr{Cvoid}, exps::Ref{Cdouble}, num_retrieved::Ref{Cint})::Cint + return exps +end + +function getBasisVectors(status::Union{StepStatus, AccessStatus}) + status.ptr == C_NULL && return [] + rank = getDeltaRank(status) + rank < 1 && return [] + + Ψ = [] + for i in 1:rank + pp = malloc_null_double_ptr(Cvoid) + GC.@preserve pp begin + @ccall libbraid.braid_StatusGetBasisVec(status.ptr::Ptr{Cvoid}, pp::Ptr{Ptr{Cvoid}}, (i-1)::Cint)::Cint + end + p = unsafe_load(pp) + if p !== C_NULL + φ = unsafe_pointer_to_objref(p)::BraidVector + push!(Ψ, φ.user_vector) + end + Base.Libc.free(pp) + end + + return Ψ +end + +end # module Status \ No newline at end of file diff --git a/braid/braid.jl/Wrappers.jl b/braid/braid.jl/Wrappers.jl new file mode 100644 index 00000000..5e712290 --- /dev/null +++ b/braid/braid.jl/Wrappers.jl @@ -0,0 +1,324 @@ +#=BHEADER********************************************************************** + * Copyright (c) 2013, Lawrence Livermore National Security, LLC. + * Produced at the Lawrence Livermore National Laboratory. + * + * This file is part of XBraid. For support, post issues to the XBraid Github page. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License (as published by the Free Software + * Foundation) version 2.1 dated February 1999. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the terms and conditions of the GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************EHEADER=# + +module Wrappers + +using LinearAlgebra: norm2, dot +using Serialization: serialize, deserialize +using ..XBraid.BraidUtils +using ..XBraid: Status, BraidApp, BraidVector + +# useful helpers + +""" +Macro which wraps the function call in a try-catch block which prints the stacktrace without throwing, +since Julia functions should not throw errors when called from braid +""" +macro braidWrapper(expr) + # this error is at compile time, and therefore fine + expr.head == :function || error("Not a function expression.") + funcname = expr.args[1].args[1].args[1] + user_funcname = split(string(funcname), "_")[end] + :( + function $(esc(funcname))(args...; kwargs...)::Cint + f = $expr + try + result = f(args...; kwargs...) + return result + catch err + stacktrace_warn("Error in user function " * $(esc(user_funcname)), err) + return 1 + end + end + ) +end + +""" +Used to add a reference to the vector u to the IdDict stored in the app. +this keeps all newly allocated braid_vectors in the global scope, preventing them from being garbage collected. +""" +function _register_vector(app::BraidApp, u::BraidVector) + app.ref_ids[objectid(u)] = u +end + +""" +Used to remove a reference to the vector u to the IdDict stored in the app. +""" +function _deregister_vector(app::BraidApp, u::BraidVector) + id = objectid(u)::UInt64 + pop!(app.ref_ids, id) +end + +# these are internal functions which directly interface with XBraid + + +# Step, Init, and Access are required + +@braidWrapper function _jl_step!(_app::Ptr{Cvoid}, + _ustop::Ptr{Cvoid}, + _fstop::Ptr{Cvoid}, + _u::Ptr{Cvoid}, + _status::Ptr{Cvoid} +)::Cint + # println("step") + app = unsafe_pointer_to_objref(_app)::BraidApp + u = unsafe_pointer_to_objref(_u)::BraidVector + ustop = unsafe_pointer_to_objref(_ustop)::BraidVector + status = Status.StepStatus(_status) + + tstart, tstop = Status.getTstartTstop(status) + delta_rank = Status.getDeltaRank(status) + + # call the user's function + # residual option + if _fstop !== C_NULL + fstop = unsafe_pointer_to_objref(_fstop)::BraidVector + app.step(app.user_app, status, u.user_vector, ustop.user_vector, fstop.user_vector, tstart, tstop) + # Delta correction + elseif delta_rank > 0 + basis_vecs = Status.getBasisVectors(status) + app.step(app.user_app, status, u.user_vector, ustop.user_vector, tstart, tstop, basis_vecs) + # Default + else + app.step(app.user_app, status, u.user_vector, ustop.user_vector, tstart, tstop) + end + + return 0 +end +precompile(_jl_step!, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) +_c_step = @cfunction(_jl_step!, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) +export _c_step, _jl_step! + +@braidWrapper function _jl_init!(_app::Ptr{Cvoid}, t::Cdouble, u_ptr::Ptr{Ptr{Cvoid}})::Cint + # println("init") + app = unsafe_pointer_to_objref(_app)::BraidApp + + # initialize u and register a reference with IdDict + u = BraidVector(app.init(app.user_app, t)) + _register_vector(app, u) + + unsafe_store!(u_ptr, pointer_from_objref(u)) + + # store max size of all initialized vectors + if app.bufsize == 0 + # This serializes u but doesn't actually + # store the data + buffer = BlackHoleBuffer() + serialize(buffer, u) + app.bufsize = buffer.ptr + sizeof(Int) + end + + if app.user_VecType == Nothing + app.user_VecType = u.VecType + end + + return 0 +end +_c_init = @cfunction(_jl_init!, Cint, (Ptr{Cvoid}, Cdouble, Ptr{Ptr{Cvoid}})) +export _c_init, _jl_init! + +@braidWrapper function _jl_init_basis!(_app::Ptr{Cvoid}, t::Cdouble, index::Cint, u_ptr::Ptr{Ptr{Cvoid}})::Cint + # println("init_basis") + app = unsafe_pointer_to_objref(_app)::BraidApp + # julia uses 1-based indexing + u = BraidVector(app.basis_init(app.user_app, t, index + 1)) + + _register_vector(app, u) + unsafe_store!(u_ptr, pointer_from_objref(u)) + + # store max size of all initialized vectors + if app.bufsize == 0 + buffer = BlackHoleBuffer() + serialize(buffer, u) + app.bufsize_lyap = buffer.ptr + sizeof(Int) + end + + # store type of initialized vector + if app.user_BasType == Nothing + app.user_BasType = u.VecType + end + + return 0 +end +_c_init_basis = @cfunction(_jl_init_basis!, Cint, (Ptr{Cvoid}, Cdouble, Cint, Ptr{Ptr{Cvoid}})) +export _c_init_basis, _jl_init_basis! + +@braidWrapper function _jl_clone!(_app::Ptr{Cvoid}, _u::Ptr{Cvoid}, v_ptr::Ptr{Ptr{Cvoid}})::Cint + # println("clone") + app = unsafe_pointer_to_objref(_app)::BraidApp + u = unsafe_pointer_to_objref(_u)::BraidVector + # initialize v, and copy u into v + v = deepcopy(u) + + # then register v with IdDict and store in v_ptr + _register_vector(app, v) + unsafe_store!(v_ptr, pointer_from_objref(v)) + + return 0 +end +precompile(_jl_clone!, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}})) +_c_clone = @cfunction(_jl_clone!, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}})) +export _c_clone, _jl_clone! + +@braidWrapper function _jl_free!(_app::Ptr{Cvoid}, _u::Ptr{Cvoid})::Cint + # println("free") + app = unsafe_pointer_to_objref(_app)::BraidApp + u = unsafe_pointer_to_objref(_u)::BraidVector + # removing the global reference to u will cause it to be garbage collected + _deregister_vector(app, u) + return 0 +end +precompile(_jl_free!, (Ptr{Cvoid}, Ptr{Cvoid})) +_c_free = @cfunction(_jl_free!, Cint, (Ptr{Cvoid}, Ptr{Cvoid})) +export _c_free, _jl_free! + +@braidWrapper function _jl_sum!(_app::Ptr{Cvoid}, + alpha::Cdouble, _x::Ptr{Cvoid}, + beta::Cdouble, _y::Ptr{Cvoid})::Cint + app = unsafe_pointer_to_objref(_app)::BraidApp + x = unsafe_pointer_to_objref(_x)::BraidVector + y = unsafe_pointer_to_objref(_y)::BraidVector + app.sum(app.user_app, alpha, x.user_vector, beta, y.user_vector) + return 0 +end +precompile(_jl_sum!, (Ptr{Cvoid}, Cdouble, Ptr{Cvoid}, Cdouble, Ptr{Cvoid})) +_c_sum = @cfunction(_jl_sum!, Cint, (Ptr{Cvoid}, Cdouble, Ptr{Cvoid}, Cdouble, Ptr{Cvoid})) +export _c_sum, _jl_sum! + +@braidWrapper function _jl_norm!(_app::Ptr{Cvoid}, _u::Ptr{Cvoid}, norm_ptr::Ptr{Cdouble})::Cint + app = unsafe_pointer_to_objref(_app)::BraidApp + u = unsafe_pointer_to_objref(_u)::BraidVector + norm = app.spatialnorm(app.user_app, u.user_vector) + unsafe_store!(norm_ptr, norm) + + return 0 +end +precompile(_jl_norm!, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cdouble})) +_c_norm = @cfunction(_jl_norm!, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cdouble})) +export _c_norm, _jl_norm! + +@braidWrapper function _jl_inner_prod!(_app::Ptr{Cvoid}, _u::Ptr{Cvoid}, _v::Ptr{Cvoid}, norm_ptr::Ptr{Cdouble})::Cint + app = unsafe_pointer_to_objref(_app)::BraidApp + u = unsafe_pointer_to_objref(_u)::BraidVector + v = unsafe_pointer_to_objref(_v)::BraidVector + prod = app.inner_prod(app.user_app, u.user_vector, v.user_vector) + unsafe_store!(norm_ptr, prod) + + return 0 +end +precompile(_jl_inner_prod!, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cdouble})) +_c_inner_prod = @cfunction(_jl_inner_prod!, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cdouble})) +export _c_inner_prod, _jl_inner_prod! + +@braidWrapper function _jl_access!(_app::Ptr{Cvoid}, _u::Ptr{Cvoid}, _status::Ptr{Cvoid})::Cint + app = unsafe_pointer_to_objref(_app)::BraidApp + status = Status.AccessStatus(_status) + + if !isnothing(app.access) + u = unsafe_pointer_to_objref(_u)::BraidVector + app.access(app.user_app, status, u.user_vector) + end + + return 0 +end +precompile(_jl_access!, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) +_c_access = @cfunction(_jl_access!, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) +export _c_access, _jl_access! + +# buffer functions + +@braidWrapper function _jl_bufsize!(_app::Ptr{Cvoid}, size_ptr::Ptr{Cint}, status::Ptr{Cvoid})::Cint + app = unsafe_pointer_to_objref(_app) + unsafe_store!(size_ptr, app.bufsize) + return 0 +end +_c_bufsize = @cfunction(_jl_bufsize!, Cint, (Ptr{Cvoid}, Ptr{Cint}, Ptr{Cvoid})) +export _c_bufsize, _jl_bufsize! + +@braidWrapper function _jl_bufpack!(_app::Ptr{Cvoid}, _u::Ptr{Cvoid}, _buffer::Ptr{Cvoid}, status::Ptr{Cvoid})::Cint + app = unsafe_pointer_to_objref(_app)::BraidApp + u = unsafe_pointer_to_objref(_u)::BraidVector + buff_arr = unsafe_wrap(Vector{UInt8}, Base.unsafe_convert(Ptr{UInt8}, _buffer), app.bufsize) + buffer = IOBuffer(buff_arr, read=true, write=true, maxsize=app.bufsize) + + # store u in buffer + seek(buffer, sizeof(Int)) + serialize(buffer, u) + + # also store the total size of the buffer + seek(buffer, 0) + write(buffer, buffer.size) + + # tell braid the written size + @ccall libbraid.braid_BufferStatusSetSize(status::Ptr{Cvoid}, buffer.size::Cdouble)::Cint + + return 0 +end +precompile(_jl_bufpack!, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) +_c_bufpack = @cfunction(_jl_bufpack!, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid})) +export _c_bufpack, _jl_bufpack! + +@braidWrapper function _jl_bufunpack!(_app::Ptr{Cvoid}, _buffer::Ptr{Cvoid}, u_ptr::Ptr{Ptr{Cvoid}}, status::Ptr{Cvoid})::Cint + @assert _buffer !== C_NULL "tried to unpack null buffer" + app = unsafe_pointer_to_objref(_app)::BraidApp + # get size of buffer we are unpacking: + header = reinterpret(Ptr{Int}, _buffer) + bufsize = unsafe_load(header)::Int + buff_arr = unsafe_wrap(Vector{UInt8}, Base.unsafe_convert(Ptr{UInt8}, _buffer), bufsize) + buffer = IOBuffer(buff_arr, read=true, write=true, maxsize=bufsize) + seek(buffer, sizeof(Int)) + + # unpack the buffer into a new julia object, then register with IdDict + u = deserialize(buffer)::BraidVector{app.user_VecType} + _register_vector(app, u) + + # store u in provided pointer + unsafe_store!(u_ptr, pointer_from_objref(u)) + return 0 +end +precompile(_jl_bufunpack!, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Ptr{Cvoid})) +_c_bufunpack = @cfunction(_jl_bufunpack!, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Ptr{Cvoid})) +export _c_bufunpack, _jl_bufunpack! + +# optional functions + +@braidWrapper function _jl_sync(_app::Ptr{Cvoid}, status::Ptr{Cvoid})::Cint + app = unsafe_pointer_to_objref(_app)::BraidApp + if app.sync !== nothing + app.sync(app.user_app, Status.SyncStatus(status)) + end + return 0 +end +precompile(_jl_sync, (Ptr{Cvoid}, Ptr{Cvoid})) +_c_sync = @cfunction(_jl_sync, Cint, (Ptr{Cvoid}, Ptr{Cvoid})) +export _c_sync, _jl_sync + +# default functions assuming the user's vector is a julia array +export default_sum!, default_norm, default_inner_prod + +function default_sum!(app, α::Real, x::AbstractArray, β::Real, y::AbstractArray) + y .= α .* x .+ β .* y +end +default_norm(app, u::AbstractArray) = norm2(u) +default_inner_prod(app, u::AbstractArray, v::AbstractArray) = dot(u, v) + + +end # module Wrappers \ No newline at end of file diff --git a/braid/braid.jl/XBraid.jl b/braid/braid.jl/XBraid.jl new file mode 100644 index 00000000..aff02192 --- /dev/null +++ b/braid/braid.jl/XBraid.jl @@ -0,0 +1,849 @@ +#=BHEADER********************************************************************** + * Copyright (c) 2013, Lawrence Livermore National Security, LLC. + * Produced at the Lawrence Livermore National Laboratory. + * + * This file is part of XBraid. For support, post issues to the XBraid Github page. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License (as published by the Free Software + * Foundation) version 2.1 dated February 1999. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the terms and conditions of the GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************EHEADER=# + +module XBraid +# Not sure we want to export anything... +# export Init, Drive, etc. + +using Serialization: serialize, deserialize, Serializer # used to pack arbitrary julia objects into buffers +using LinearAlgebra: norm2, dot +using MPI + +include("BraidUtils.jl") +using .BraidUtils + +#= + | For this to work, XBraid must be compiled as a shared library. + | $ make braid shared=yes + | and the Fortran flags braid_Fortran_SpatialCoarsen, braid_Fortran_Residual, + | braid_Fortran_TimeGrid, and braid_Fortran_Sync must all be set to zero in + | braid.h before compiling. (TODO: figure out why this is...) + =# + +# This can contain anything (TODO: do I even need this?) +mutable struct BraidVector{T} + user_vector::T + VecType::Type +end +BraidVector(u::T) where {T} = BraidVector(u, T) + +OptionalFunction = Union{Function,Nothing} + +""" +This is an internal structure that is not generally exposed to the user. +This enables the user interface to only use memory safe function calls. +User defined data structures that are not time-dependent can be declared in +the global scope, or they can be packed into a user defined object and passed to +Init(), in which case the object will be passed as the first argument in step, sum, spatialnorm, and access. +_app.user_app is passed as the first argument to some user defined function. +Stores any time-independent data the user may need to compute a time-step. +Large, preallocate arrays should be packed into this object, since operations +involving globally scoped variables are slower than local variables. +struct braid_App end + +This may be manually contructed in order to pass to test functions. + +julia> app = BraidApp(my_app, MPI.COMM_WORLD, MPI.COMM_WORLD, my_step, my_init, my_sum, my_norm, my_access); + +julia> testSpatialNorm(app, 0.); + """ +mutable struct BraidApp + user_app::Any # user defined app data structure + + comm::MPI.Comm # global mpi communicator + comm_t::MPI.Comm # temporal mpi communicator + + # user functions + step::Function + init::Function + sum::Function + spatialnorm::Function + access::Function + + sync::OptionalFunction + basis_init::OptionalFunction + inner_prod::OptionalFunction + + # dictionary to store globally scoped references to allocated braid_vector objects + ref_ids::IdDict{UInt64,BraidVector} + bufsize::Integer # expected serialized size of user's vector + bufsize_lyap::Integer + user_AppType::Type + user_VecType::Type + user_BasType::Type +end + +# default constructor +function BraidApp(app, comm::MPI.Comm, comm_t::MPI.Comm, step::Function, init::Function, sum::Function, norm::Function, access::Function, sync::OptionalFunction, basis_init::OptionalFunction, inner_prod::OptionalFunction) + BraidApp(app, comm, comm_t, step, init, sum, norm, access, sync, basis_init, inner_prod, IdDict(), 0, 0, Nothing, Nothing, Nothing) +end + +function BraidApp(app, comm::MPI.Comm, step, init, access; comm_t=comm, sum=default_sum!, spatialnorm=default_norm, sync=nothing, basis_init=nothing, inner_prod=default_inner_prod) + BraidApp(app, comm, comm_t, step, init, sum, spatialnorm, access, sync, basis_init, inner_prod) +end + + +""" +Stores all the information needed to run XBraid. Create this object with Init(), then pass this to Drive() to run XBraid. + +julia> core = XBraid.Init(MPI.COMM_WORLD, MPI.COMM_WORLD, + +julia> XBraid.Drive(core); + +""" +mutable struct BraidCore + # internal values + _braid_core::Ptr{Cvoid} + _braid_app::BraidApp + tstart::Float64 + tstop::Float64 + ntime::Int32 + function BraidCore(_braid_core, _braid_app, tstart, tstop, ntime) + x = new(_braid_core, _braid_app, tstart, tstop, ntime) + finalizer(x) do core + # @async println("Destroying BraidCore $(core._braid_core)") + @ccall libbraid.braid_Destroy(core._braid_core::Ptr{Cvoid})::Cint + end + end +end + + +# import wrapper functions and status structures +include("Status.jl") +include("Wrappers.jl") +using .Status, .Wrappers + +function postInitPrecompile(app::BraidApp) + #= + # This is a hack to make sure every processor knows how big the user's vector will be. + # We can also take this time to precompile the user's step function so it doesn't happen + # in serial on the coarse grid. + =# + GC.enable(false) # disable garbage collection + app_ptr = pointer_from_objref(app)::Ptr{Cvoid} + pp = malloc_null_double_ptr(Cvoid) + + _jl_init!(app_ptr, 0.0, pp) + u_ptr = unsafe_load(pp) + u = unsafe_pointer_to_objref(u_ptr)::BraidVector + VecType = typeof(u.user_vector) + AppType = typeof(app.user_app) + + !precompile(app.step, (AppType, Status.StepStatus, VecType, VecType, Float64, Float64)) && println("failed to precompile step") + !precompile(app.spatialnorm, (AppType, VecType)) && println("failed to precompile norm") + !precompile(app.sum, (AppType, Float64, VecType, Float64, VecType)) && println("failed to precompile sum") + if app.access !== nothing + !precompile(app.access, (AppType, Status.AccessStatus, VecType)) && println("failed to precompile access") + end + + # some julia functions that can be precompiled + precompile(deepcopy, (BraidVector{VecType},)) + precompile(unsafe_store!, (Ptr{Float64}, Float64,)) + precompile(Tuple{typeof(Base.getproperty),BraidVector{VecType},Symbol}) + precompile(Tuple{typeof(Base.unsafe_store!),Ptr{Int32},Int64}) + precompile(Tuple{typeof(deserialize),Serializer{Base.GenericIOBuffer{Array{UInt8,1}}},DataType}) + precompile(Tuple{typeof(Base.unsafe_wrap),Type{Array{UInt8,1}},Ptr{UInt8},Int64}) + precompile(Tuple{Type{NamedTuple{(:read, :write, :maxsize),T} where T<:Tuple},Tuple{Bool,Bool,Int64}}) + + _jl_free!(app_ptr, u_ptr) + Base.Libc.free(pp) + GC.enable(true) # re-enable garbage collection + + app.user_AppType = AppType + app.user_VecType = VecType +end + +function deltaPrecompile(app::BraidApp) + GC.enable(false) + app_ptr = pointer_from_objref(app)::Ptr{Cvoid} + pp = malloc_null_double_ptr(Cvoid) + AppType = app.user_AppType + VecType = app.user_VecType + + _jl_init_basis!(app_ptr, 0.0, Int32(0), pp) + ψ_ptr = unsafe_load(pp) + ψ = unsafe_pointer_to_objref(ψ_ptr) + BasType = typeof(ψ.user_vector) + !precompile(app.step, (AppType, Status.StepStatus, VecType, VecType, Float64, Float64, Vector{BasType})) && println("failed to compile step_du") + !precompile(app.inner_prod, (AppType, VecType, VecType)) && println("failed to compile inner_prod u⋅u") + !precompile(app.inner_prod, (AppType, BasType, VecType)) && println("failed to compile inner_prod ψ⋅u") + !precompile(app.inner_prod, (AppType, VecType, BasType)) && println("failed to compile inner_prod u⋅ψ") + !precompile(app.inner_prod, (AppType, BasType, BasType)) && println("failed to compile inner_prod ψ⋅ψ") + + _jl_free!(app_ptr, ψ_ptr) + Base.Libc.free(pp) + GC.enable(true) + + app.user_BasType = BasType +end + +""" +Create a new BraidCore object. The BraidCore object is used to run the XBraid solver, and is destroyed when the object is garbage collected. + + +""" +function Init( + comm_world::MPI.Comm, + tstart::Real, + tstop::Real, + ntime::Integer, + step::Function, + init::Function, + access::Function; + sum=default_sum!, + spatialnorm=default_norm, + sync=nothing, + comm_t::MPI.Comm=comm_world, + app=nothing, + kwargs... +)::BraidCore + _app = BraidApp(app, comm_world, comm_t, step, init, sum, spatialnorm, access, sync, nothing, nothing) + _core_ptr = malloc_null_double_ptr(Cvoid) + + GC.@preserve _core_ptr begin + @ccall libbraid.braid_Init( + _app.comm::MPI.MPI_Comm, _app.comm_t::MPI.MPI_Comm, + tstart::Cdouble, tstop::Cdouble, ntime::Cint, + _app::Ref{BraidApp}, _c_step::Ptr{Cvoid}, _c_init::Ptr{Cvoid}, _c_clone::Ptr{Cvoid}, + _c_free::Ptr{Cvoid}, _c_sum::Ptr{Cvoid}, _c_norm::Ptr{Cvoid}, _c_access::Ptr{Cvoid}, + _c_bufsize::Ptr{Cvoid}, _c_bufpack::Ptr{Cvoid}, _c_bufunpack::Ptr{Cvoid}, + _core_ptr::Ptr{Ptr{Cvoid}}, + )::Cint + end + + _core = unsafe_load(_core_ptr) + Base.Libc.free(_core_ptr) + + if sync !== nothing + GC.@preserve _core begin + @ccall libbraid.braid_SetSync( + _core::Ptr{Cvoid}, _c_sync::Ptr{Cvoid} + )::Cint + end + end + + core = BraidCore(_core, _app, tstart, tstop, ntime) + + # parse kwargs + if haskey(kwargs, :timer_file) + timer_file = kwargs[:timer_file]::String + setTimerFile(core, timer_file) + end + if haskey(kwargs, :timing_level) + timing_level = kwargs[:timing_level]::Integer + setTimingLevel(core, timing_level) + end + if haskey(kwargs, :write_conv_hist) + write_conv_hist = kwargs[:write_conv_hist]::String + setWriteConvHistory(core, write_conv_hist) + end + if haskey(kwargs, :max_levels) + max_levels = kwargs[:max_levels]::Integer + setMaxLevels(core, max_levels) + end + if haskey(kwargs, :incr_max_levels) + incr_max_levels = kwargs[:incr_max_levels]::Bool + incr_max_levels && setIncrMaxLevels(core) + end + if haskey(kwargs, :skip) + skip = kwargs[:skip]::Bool + setSkip(core, skip) + end + if haskey(kwargs, :refine) + refine = kwargs[:refine]::Bool + setRefine(core, refine) + end + if haskey(kwargs, :max_refinements) + max_refinements = kwargs[:max_refinements]::Integer + setMaxRefinements(core, max_refinements) + end + if haskey(kwargs, :tpoints_cutoff) + tpoints_cutoff = kwargs[:tpoints_cutoff]::Integer + setTPointsCutoff(core, tpoints_cutoff) + end + if haskey(kwargs, :min_coarse) + min_coarse = kwargs[:min_coarse]::Integer + setMinCoarse(core, min_coarse) + end + if haskey(kwargs, :relax_only_cg) + relax_only_cg = kwargs[:relax_only_cg]::Bool + setRelaxOnlyCG(core, relax_only_cg) + end + if haskey(kwargs, :abs_tol) + abs_tol = kwargs[:abs_tol]::Float64 + setAbsTol(core, abs_tol) + end + if haskey(kwargs, :rel_tol) + rel_tol = kwargs[:rel_tol]::Float64 + setRelTol(core, rel_tol) + end + if haskey(kwargs, :n_relax_fine) + n_relax = kwargs[:n_relax]::Integer + setNRelax(core, 0, n_relax) + end + if haskey(kwargs, :cfactor) + cfactor = kwargs[:cfactor] + if isa(cfactor, Integer) + setCFactor(core, -1, cfactor) + elseif isa(cfactor, Vector{Integer}) + for (i, cf) in enumerate(cfactor) + setCFactor(core, i-1, cf) + end + else + error("cfactor must be an Integer or Vector{Integer}") + end + end + if haskey(kwargs, :cfactor_fine) + cfactor_fine = kwargs[:cfactor_fine]::Integer + setCFactorFine(core, 0, cfactor_fine) + end + if haskey(kwargs, :max_iter) + max_iter = kwargs[:max_iter]::Integer + setMaxIter(core, max_iter) + end + if haskey(kwargs, :fmg) + fmg = kwargs[:fmg]::Bool + fmg && setFMG(core) + end + if haskey(kwargs, :n_fmg_vcycles) + fmg_cycle = kwargs[:fmg_cycle]::Integer + setNFMGVcyc(core, fmg_cycle) + end + if haskey(kwargs, :storage) + storage = kwargs[:storage]::Integer + setStorage(core, storage) + end + if haskey(kwargs, :temporal_norm) + norm = kwargs[:temporal_norm]::Symbol + if norm == :One + setTemporalNorm(core, 1) + elseif norm == :Two + setTemporalNorm(core, 2) + elseif norm == :Inf + setTemporalNorm(core, 3) + else + error("temporal_norm must be one of :One, :Two, or :Inf") + end + end + if haskey(kwargs, :periodic) + periodic = kwargs[:periodic]::Bool + setPeriodic(core, periodic) + end + if haskey(kwargs, :print_level) + print_level = kwargs[:print_level]::Integer + print_level < 0 && error("print_level must be non-negative") + setPrintLevel(core, print_level) + end + if haskey(kwargs, :file_io_level) + file_io_level = kwargs[:file_io_level]::Integer + file_io_level < 0 && error("file_io_level must be non-negative") + setFileIOLevel(core, file_io_level) + end + if haskey(kwargs, :save_cycle) + save_cycle = kwargs[:save_cycle]::Bool + setFileIOLevel(core, save_cycle ? 1 : 0) + end + if haskey(kwargs, :print_file) + print_file = kwargs[:print_file]::String + setPrintFile(core, print_file) + end + if haskey(kwargs, :access_level) + access_level = kwargs[:access_level]::Integer + access_level < 0 && error("access_level must be non-negative") + setAccessLevel(core, access_level) + end + if haskey(kwargs, :final_fc_relax) + final_fc_relax = kwargs[:final_fc_relax]::Bool + final_fc_relax && setFinalFCRelax(core) + end + if haskey(kwargs, :seq_soln) + seq_soln = kwargs[:seq_soln]::Bool + setSeqSoln(core, seq_soln) + end + + return core +end + +""" +Warmup the BraidCore object. XBraid.Drive calls this function automatically, but it can be called manually to precompile all user functions. + +This function is not necessary for XBraid to run, but it can significantly reduce the time to run the first time step. Note that this function calls all user functions, so if any user functions have side-effects, this may behave unexpectedly + +julia> XBraid.Warmup(core) + +Julia> XBraid.Drive(core; warmup=false) + +See also: XBraid.Drive +""" +function Warmup(core::BraidCore) + _app = core._braid_app + # precompile all user functions by calling them from braid_Warmup + # if any user functions have side-effects, this may behave unexpectedly + fdt = (core.tstop - core.tstart) / (core.ntime + 1) + cdt = 2fdt + + if (_app.basis_init !== nothing) && (_app.inner_prod !== nothing) + @ccall libbraid.braid_Warmup( + _app::Ref{BraidApp}, _app.comm::MPI.MPI_Comm, core.tstart::Cdouble, fdt::Cdouble, cdt::Cdouble, + _c_init::Ptr{Cvoid}, _c_access::Ptr{Cvoid}, _c_free::Ptr{Cvoid}, + _c_clone::Ptr{Cvoid}, _c_sum::Ptr{Cvoid}, _c_norm::Ptr{Cvoid}, + _c_bufsize::Ptr{Cvoid}, _c_bufpack::Ptr{Cvoid}, _c_bufunpack::Ptr{Cvoid}, + C_NULL::Ptr{Cvoid}, C_NULL::Ptr{Cvoid}, _c_step::Ptr{Cvoid}, + _c_init_basis::Ptr{Cvoid}, _c_inner_prod::Ptr{Cvoid} + )::Cint + else + @ccall libbraid.braid_Warmup( + _app::Ref{BraidApp}, _app.comm::MPI.MPI_Comm, core.tstart::Cdouble, fdt::Cdouble, cdt::Cdouble, + _c_init::Ptr{Cvoid}, _c_access::Ptr{Cvoid}, _c_free::Ptr{Cvoid}, + _c_clone::Ptr{Cvoid}, _c_sum::Ptr{Cvoid}, _c_norm::Ptr{Cvoid}, + _c_bufsize::Ptr{Cvoid}, _c_bufpack::Ptr{Cvoid}, _c_bufunpack::Ptr{Cvoid}, + C_NULL::Ptr{Cvoid}, C_NULL::Ptr{Cvoid}, _c_step::Ptr{Cvoid}, + C_NULL::Ptr{Cvoid}, C_NULL::Ptr{Cvoid} + )::Cint + end + nothing +end + +""" +Wraps the XBraid braid_Drive function. This function is called by XBraid.Drive, and should not be called directly. +""" +function _Drive(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_Drive(core._braid_core::Ptr{Cvoid})::Cint + end +end + +""" +Run the XBraid solver. This function calls XBraid.Warmup by default, but this can be disabled by setting warmup=false, in which case a less expensive (but less effective) option is used to precompile the user functions. +""" +function Drive(core::BraidCore; warmup=true) + if warmup + Warmup(core) + else + _app = core._braid_app + # cheaper precompile option, but less effective + begin + postInitPrecompile(_app) + if (_app.basis_init !== nothing) && (_app.inner_prod !== nothing) + deltaPrecompile(_app) + end + end + end + _Drive(core) + return nothing +end + +function printStats(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_PrintStats(core._braid_core::Ptr{Cvoid})::Cint + end +end + +function setTimerFile(core::BraidCore, filestem::String) + len = @ccall strlen(filestem::Cstring)::Cint + GC.@preserve core begin + @ccall libbraid.braid_SetTimerFile(core._braid_core::Ptr{Cvoid}, len::Cint, filestem::Cstring)::Cint + end +end + +function printTimers(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_PrintTimers(core._braid_core::Ptr{Cvoid})::Cint + end +end + +function resetTimer(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_ResetTimer(core._braid_core::Ptr{Cvoid})::Cint + end +end + +function setTimings(core::BraidCore, timing_level::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetTimings(core._braid_core::Ptr{Cvoid}, timing_level::Cint)::Cint + end +end + +function writeConvHistory(core::BraidCore, filename::String) + GC.@preserve core begin + @ccall libbraid.braid_WriteConvHistory(core._braid_core::Ptr{Cvoid}, filename::Cstring)::Cint + end +end + +function setMaxLevels(core::BraidCore, max_levels::Integer) + @assert max_levels > 0 "Max. levels must be an integer greater than 0" + GC.@preserve core begin + @ccall libbraid.braid_SetMaxLevels(core._braid_core::Ptr{Cvoid}, max_levels::Cint)::Cint + end +end + +function setIncrMaxLevels(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_SetIncrMaxLevels(core._braid_core::Ptr{Cvoid})::Cint + end +end + +function setSkip(core::BraidCore, skip::Bool) + GC.@preserve core begin + @ccall libbraid.braid_SetSkip(core._braid_core::Ptr{Cvoid}, skip::Cint)::Cint + end +end + +function setRefine(core::BraidCore, refine::Bool) + GC.@preserve core begin + @ccall libbraid.braid_SetRefine(core._braid_core::Ptr{Cvoid}, refine::Cint)::Cint + end +end + +function setMaxRefinements(core::BraidCore, max_refinements::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetMaxRefinements(core._braid_core::Ptr{Cvoid}, max_refinements::Cint)::Cint + end +end + +function setTPointsCutoff(core::BraidCore, tpoints_cutoff::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetTPointsCutoff(core._braid_core::Ptr{Cvoid}, tpoints_cutoff::Cint)::Cint + end +end + +function setMinCoarse(core::BraidCore, min_coarse::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetMinCoarse(core._braid_core::Ptr{Cvoid}, min_coarse::Cint)::Cint + end +end + +function setRelaxOnlyCG(core::BraidCore, relax_only_cg::Bool) + GC.@preserve core begin + @ccall libbraid.braid_SetRelaxOnlyCG(core._braid_core::Ptr{Cvoid}, relax_only_cg::Cint)::Cint + end +end + +function setAbsTol(core::BraidCore, atol::Real) + GC.@preserve core begin + @ccall libbraid.braid_SetAbsTol(core._braid_core::Ptr{Cvoid}, atol::Cdouble)::Cint + end +end + +function setRelTol(core::BraidCore, rtol::Real) + GC.@preserve core begin + @ccall libbraid.braid_SetRelTol(core._braid_core::Ptr{Cvoid}, rtol::Cdouble)::Cint + end +end + +function setNRelax(core::BraidCore, level::Integer, nrelax::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetNRelax(core._braid_core::Ptr{Cvoid}, level::Cint, nrelax::Cint)::Cint + end +end + +function setCRelaxWt(core::BraidCore, level::Integer, Cwt::Real) + GC.@preserve core begin + @ccall libbraid.braid_SetCRelaxWt(core._braid_core::Ptr{Cvoid}, level::Cint, Cwt::Cdouble)::Cint + end +end + +function setCFactor(core::BraidCore, level::Integer, cfactor::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetCFactor(core._braid_core::Ptr{Cvoid}, level::Cint, cfactor::Cint)::Cint + end +end + +function setMaxIter(core::BraidCore, max_iter::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetMaxIter(core._braid_core::Ptr{Cvoid}, max_iter::Cint)::Cint + end +end + +function setFMG(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_SetFMG(core._braid_core::Ptr{Cvoid})::Cint + end +end + +function setNFMG(core::BraidCore, k::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetNFMG(core._braid_core::Ptr{Cvoid}, k::Cint)::Cint + end +end + +function setNFMGVcyc(core::BraidCore, nfmg_Vcyc::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetNFMGVcyc(core._braid_core::Ptr{Cvoid}, nfmg_Vcyc::Cint)::Cint + end +end + +function setStorage(core::BraidCore, storage::Bool) + GC.@preserve core begin + @ccall libbraid.braid_SetStorage(core._braid_core::Ptr{Cvoid}, storage::Cint)::Cint + end +end + +function setTemporalNorm(core::BraidCore, tnorm::Bool) + GC.@preserve core begin + @ccall libbraid.braid_SetTemporalNorm(core._braid_core::Ptr{Cvoid}, tnorm::Cint)::Cint + end +end + +function setPeriodic(core::BraidCore, periodic::Bool) + GC.@preserve core begin + @ccall libbraid.braid_SetPeriodic(core._braid_core::Ptr{Cvoid}, periodic::Cint)::Cint + end +end + +function setPrintLevel(core::BraidCore, print_level::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetPrintLevel(core._braid_core::Ptr{Cvoid}, print_level::Cint)::Cint + end +end + +function setFileIOLevel(core::BraidCore, io_level::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetFileIOLevel(core._braid_core::Ptr{Cvoid}, io_level::Cint)::Cint + end +end + +function setPrintFile(core::BraidCore, filename::String) + GC.@preserve core begin + @ccall libbraid.braid_SetPrintFile(core._braid_core::Ptr{Cvoid}, filename::Cstring)::Cint + end +end + +function setDefaultPrintFile(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_SetDefaultPrintFile(core._braid_core::Ptr{Cvoid})::Cint + end +end + +""" +Set access level for XBraid. This controls how often the user's access function is called. + + - 0: Never call access function + - 1: Call access function only when XBraid is finished + - 2: Call access function at every XBraid iteration, on every level + +Default is 1. +""" +function setAccessLevel(core::BraidCore, access_level::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetAccessLevel(core._braid_core::Ptr{Cvoid}, access_level::Cint)::Cint + end +end + +function setFinalFCRelax(core::BraidCore) + GC.@preserve core begin + @ccall libbraid.braid_SetFinalFCRelax(core._braid_core::Ptr{Cvoid})::Cint + end +end + +function getNumIter(core::BraidCore) + niter = Ref{Cint}(0) + GC.@preserve core begin + @ccall libbraid.braid_GetNumIter(core._braid_core::Ptr{Cvoid}, niter::Ref{Cint})::Cint + end + return niter[] +end + +function getRNorms(core::BraidCore) + nrequest = Ref{Cint}() + nrequest[] = getNumIter(core) + rnorms = zeros(nrequest[]) + GC.@preserve core begin + @ccall libbraid.braid_GetRNorms(core._braid_core::Ptr{Cvoid}, nrequest::Ref{Cint}, rnorms::Ref{Cdouble})::Cint + end + nrequest[] == 0 && return Float64[] + return rnorms[1:nrequest[]] +end + +function getNLevels(core::BraidCore) + nlevels = Ref(0) + GC.@preserve core begin + @ccall libbraid.braid_GetNLevels(core._braid_core::Ptr{Cvoid}, nlevels::Ref{Cint})::Cint + end + return nlevels[] +end + +function setSeqSoln(core::BraidCore, seq_soln::Bool) + GC.@preserve core begin + @ccall libbraid.braid_GetNLevels(core._braid_core::Ptr{Cvoid}, seq_soln::Cint)::Cint + end +end + +function setRichardsonEstimation(core::BraidCore, est_error::Bool, richardson::Bool, local_order::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetRichardsonEstimation(core._braid_core::Ptr{Cvoid}, est_error::Cint, richardson::Cint, local_order::Cint)::Cint + end +end + +function setSync(core::BraidCore, sync::Function) + core._braid_app.sync = sync + + GC.@preserve core begin + @ccall libbraid.braid_SetSync(core._braid_core::Ptr{Cvoid}, _c_sync::Ptr{Cvoid})::Cint + end +end + +function setDeltaCorrection(core::BraidCore, rank::Integer, basis_init::Function; inner_prod::Function=default_inner_prod) + core._braid_app.basis_init = basis_init + core._braid_app.inner_prod = inner_prod + # deltaPrecompile(core._braid_app) + + GC.@preserve core begin + @ccall libbraid.braid_SetDeltaCorrection(core._braid_core::Ptr{Cvoid}, rank::Cint, _c_init_basis::Ptr{Cvoid}, _c_inner_prod::Ptr{Cvoid})::Cint + end +end + +function setDeferDelta(core::BraidCore, level::Integer, iter::Integer) + GC.@preserve core begin + @ccall libbraid.braid_SetDeferDelta(core._braid_core::Ptr{Cvoid}, level::Cint, iter::Cint)::Cint + end +end + +function setLyapunovEstimation(core::BraidCore; relax::Bool=false, cglv::Bool=true, exponents::Bool=false) + GC.@preserve core begin + @ccall libbraid.braid_SetLyapunovEstimation(core._braid_core::Ptr{Cvoid}, relax::Cint, cglv::Cint, exponents::Cint)::Cint + end +end + +# Still missing spatial coarsening, residual, and adjoint + +# braid_test +function testInitAccess(app::BraidApp, t::Real, outputFile::Libc.FILE) + pass = @ccall libbraid.braid_TestInitAccess( + app::Ref{BraidApp}, + app.comm::MPI.MPI_Comm, + outputFile::Libc.FILE, + t::Cdouble, + _c_init::Ptr{Cvoid}, + _c_access::Ptr{Cvoid}, + _c_free::Ptr{Cvoid}, + )::Cint + return pass > 0 + + # println("Serialized size of user vector: $(app.bufsize)") + # println("Check output for objects not properly freed:") + # println(app.ref_ids) +end +testInitAccess(app::BraidApp, t::Real, outputFile::IO) = testInitAccess(app, t, Libc.FILE(outputFile)) +testInitAccess(app::BraidApp, t::Real) = testInitAccess(app, t, c_stdout) + +function testClone(app::BraidApp, t::Real, outputFile::Libc.FILE) + pass = @ccall libbraid.braid_TestClone( + app::Ref{BraidApp}, + app.comm::MPI.MPI_Comm, + outputFile::Libc.FILE, + t::Cdouble, + _c_init::Ptr{Cvoid}, + _c_access::Ptr{Cvoid}, + _c_free::Ptr{Cvoid}, + _c_clone::Ptr{Cvoid}, + )::Cint + return pass > 0 +end +testClone(app::BraidApp, t::Real, outputFile::IO) = testClone(app, t, Libc.FILE(outputFile)) +testClone(app::BraidApp, t::Real) = testClone(app, t, c_stdout) + +function testSum(app::BraidApp, t::Real, outputFile::Libc.FILE) + pass = @ccall libbraid.braid_TestSum( + app::Ref{BraidApp}, + app.comm::MPI.MPI_Comm, + outputFile::Libc.FILE, + t::Cdouble, + _c_init::Ptr{Cvoid}, + _c_access::Ptr{Cvoid}, + _c_free::Ptr{Cvoid}, + _c_clone::Ptr{Cvoid}, + _c_sum::Ptr{Cvoid}, + )::Cint + return pass > 0 +end +testSum(app::BraidApp, t::Real, outputFile::IO) = testSum(app, t, Libc.FILE(outputFile)) +testSum(app::BraidApp, t::Real) = testSum(app, t, c_stdout) + +function testSpatialNorm(app::BraidApp, t::Real, outputFile::Libc.FILE) + pass = @ccall libbraid.braid_TestSpatialNorm( + app::Ref{BraidApp}, + app.comm::MPI.MPI_Comm, + outputFile::Libc.FILE, + t::Cdouble, + _c_init::Ptr{Cvoid}, + _c_free::Ptr{Cvoid}, + _c_clone::Ptr{Cvoid}, + _c_sum::Ptr{Cvoid}, + _c_norm::Ptr{Cvoid}, + )::Cint + return pass > 0 +end +testSpatialNorm(app::BraidApp, t::Real, outputFile::IO) = testSpatialNorm(app, t, Libc.FILE(outputFile)) +testSpatialNorm(app::BraidApp, t::Real) = testSpatialNorm(app, t, c_stdout) + +function testBuf(app::BraidApp, t::Real, outputFile::Libc.FILE) + pass = @ccall libbraid.braid_TestBuf( + app::Ref{BraidApp}, + app.comm::MPI.MPI_Comm, + outputFile::Libc.FILE, + t::Cdouble, + _c_init::Ptr{Cvoid}, + _c_free::Ptr{Cvoid}, + _c_sum::Ptr{Cvoid}, + _c_norm::Ptr{Cvoid}, + _c_bufsize::Ptr{Cvoid}, + _c_bufpack::Ptr{Cvoid}, + _c_bufunpack::Ptr{Cvoid}, + )::Cint + return pass > 0 + # print('\n') + # println("Check output for objects not freed:") + # println(app.ref_ids) +end +testBuf(app::BraidApp, t::Real, outputFile::IO) = testBuf(app, t, Libc.FILE(outputFile)) +testBuf(app::BraidApp, t::Real) = testBuf(app, t, c_stdout) + +function testDelta(app::BraidApp, t::Real, dt::Real, rank::Integer, outputFile::Libc.FILE) + app.basis_init::Function + app.inner_prod::Function + pass = @ccall libbraid.braid_TestDelta( + app::Ref{BraidApp}, + app.comm::MPI.MPI_Comm, + outputFile::Libc.FILE, + t::Cdouble, + dt::Cdouble, + rank::Cint, + _c_init::Ptr{Cvoid}, + _c_init_basis::Ptr{Cvoid}, + _c_access::Ptr{Cvoid}, + _c_free::Ptr{Cvoid}, + _c_clone::Ptr{Cvoid}, + _c_sum::Ptr{Cvoid}, + _c_bufsize::Ptr{Cvoid}, + _c_bufpack::Ptr{Cvoid}, + _c_bufunpack::Ptr{Cvoid}, + _c_inner_prod::Ptr{Cvoid}, + _c_step::Ptr{Cvoid}, + )::Cint + return pass > 0 + # println("Check output for objects not freed:") + # println(app.ref_ids) +end +testDelta(app::BraidApp, t::Real, dt::Real, rank::Integer, outputFile::IO) = testDelta(app, t, dt, rank, Libc.FILE(outputFile)) +testDelta(app::BraidApp, t::Real, dt::Real, rank::Integer) = testDelta(app, t, dt, rank, c_stdout) + +end # module XBraid diff --git a/braid/braid_F90_iface.c b/braid/braid_F90_iface.c index 8622a94b..85e88b70 100644 --- a/braid/braid_F90_iface.c +++ b/braid/braid_F90_iface.c @@ -18,7 +18,9 @@ * Temple Place, Suite 330, Boston, MA 02111-1307 USA * ***********************************************************************EHEADER*/ - + +/* This interface is optionally available */ +#if (braid_Fortran_Iface) /** \file _braid_F90_iface.c * \brief Define F90 interface for user routines. @@ -1480,3 +1482,4 @@ braid_F90_Name(braid_set_sync_f90, BRAID_SET_SYNC_F90)( #endif +#endif diff --git a/braid/braid_status.c b/braid/braid_status.c index 62fc3b47..f647cb2c 100644 --- a/braid/braid_status.c +++ b/braid/braid_status.c @@ -276,6 +276,7 @@ braid_StatusGetBasisVec(braid_Status status, if ((ba != NULL) && (index < ba->rank)) { *v_ptr = ba->userVecs[index]; + return _braid_error_flag; } return _braid_error_flag; } diff --git a/braid/braid_test.c b/braid/braid_test.c index d15c6a4a..2f218a53 100644 --- a/braid/braid_test.c +++ b/braid/braid_test.c @@ -39,6 +39,10 @@ braid_TestInitAccess( braid_App app, braid_PtFcnAccess myaccess, braid_PtFcnFree myfree) { + if (!fp) + { + fp = stdout; + } braid_Vector u ; braid_Status status = _braid_CTAlloc(_braid_Status, 1); @@ -84,6 +88,10 @@ braid_TestClone( braid_App app, braid_PtFcnFree myfree, braid_PtFcnClone clone) { + if (!fp) + { + fp = stdout; + } braid_Vector u, v; braid_Status status = _braid_CTAlloc(_braid_Status, 1); @@ -143,6 +151,10 @@ braid_TestSum( braid_App app, braid_PtFcnClone clone, braid_PtFcnSum sum ) { + if (!fp) + { + fp = stdout; + } braid_Vector u, v; braid_Status status = _braid_CTAlloc(_braid_Status, 1); @@ -215,6 +227,10 @@ braid_TestSpatialNorm( braid_App app, braid_PtFcnSum sum, braid_PtFcnSpatialNorm spatialnorm) { + if (!fp) + { + fp = stdout; + } braid_Vector u, v, w; braid_Real result1, result2; @@ -377,6 +393,10 @@ braid_TestInnerProd( braid_App app, braid_PtFcnSum sum, braid_PtFcnInnerProd inner_prod) { + if (!fp) + { + fp = stdout; + } braid_Vector u, v; braid_Real result1, result2; @@ -503,6 +523,10 @@ braid_TestBuf( braid_App app, braid_PtFcnBufPack bufpack, braid_PtFcnBufUnpack bufunpack) { + if (!fp) + { + fp = stdout; + } braid_Vector u, v; braid_Real result1; @@ -602,6 +626,10 @@ braid_TestCoarsenRefine( braid_App app, braid_PtFcnSCoarsen coarsen, braid_PtFcnSRefine refine) { + if (!fp) + { + fp = stdout; + } braid_Vector u, v, w, uc, vc, wc; braid_Real result1; @@ -781,6 +809,10 @@ braid_TestResidual( braid_App app, braid_PtFcnResidual residual, braid_PtFcnStep step) { + if (!fp) + { + fp = stdout; + } braid_Vector u, unext, ustop, fstop; braid_Real result1; @@ -924,6 +956,10 @@ braid_TestAll( braid_App app, braid_PtFcnStep step) { + if (!fp) + { + fp = stdout; + } braid_Int myid_x, flag = 0, correct = 1; MPI_Comm_rank( comm_x, &myid_x ); @@ -1024,9 +1060,12 @@ braid_TestDelta(braid_App app, braid_PtFcnInnerProd myinner_prod, braid_PtFcnStep mystep) { + if (!fp) + { + fp = stdout; + } - braid_Vector u; - braid_Vector v; + braid_Vector u, v, w; braid_Basis A, B; // braid_Int actual_rank = rank; braid_Status status = _braid_CTAlloc(_braid_Status, 1); @@ -1120,7 +1159,6 @@ braid_TestDelta(braid_App app, myinner_prod(app, B->userVecs[i], B->userVecs[i], &prod); if ((prod < wiggle) && (prod > -wiggle)) { - _braid_ParFprintfFlush(fp, myid_x, " braid_TestInitBasis: B has linearly dependent columns!\n"); _braid_ParFprintfFlush(fp, myid_x, " braid_TestInitBasis: Test 2 Failed\n"); correct = 0; @@ -1152,6 +1190,8 @@ braid_TestDelta(braid_App app, } _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: v = clone(u) \n"); myclone(app, u, &v); + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: w = clone(u) \n"); + myclone(app, u, &w); _braid_StepStatusInit(t, t + dt, 0, wiggle, 0, 0, 0, 0, braid_ASCaller_Residual, B, sstatus); _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: v = step(v)\n"); @@ -1159,25 +1199,34 @@ braid_TestDelta(braid_App app, mystep(app, u, NULL, v, sstatus); braid_Real eps = sqrt(wiggle); - _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: u = step(u + eps*A_0) \n"); _braid_StepStatusInit(t, t + dt, 0, wiggle, 0, 0, 0, 0, braid_ASCaller_Residual, NULL, sstatus); - mysum(app, eps, A->userVecs[0], 1., u); - mystep(app, u, NULL, u, sstatus); - - _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: v = B_0 - (u - v)/eps\n"); - _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: (v = step_dv(u)*A_0 - (step(u + eps*A_0) - step(u))/eps) \n"); - mysum(app, 1/eps, u, -1/eps, v); - mysum(app, 1., B->userVecs[0], -1., v); - _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: result = inner_prod(v, v) \n"); - braid_Real result; - myinner_prod(app, v, v, &result); - _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: actual output: result approx. %1.2e \n", result); - _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: expected output: positive, near zero \n\n"); + for (braid_Int i = 0; i < rank; i++) + { + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: for i = %d \n", i); + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: w = step(u + eps*A[i]) \n"); + + mysum(app, 1., u, 0., w); + mysum(app, eps, A->userVecs[i], 1., w); + mystep(app, w, NULL, w, sstatus); + + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: w = B[i] - (w - v)/eps\n"); + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: (w = step_dv(u)*A[i] - (step(u + eps*A[i]) - step(u))/eps) \n"); + mysum(app, -1/eps, v, 1/eps, w); + mysum(app, 1., B->userVecs[i], -1., w); + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: result = inner_prod(w, w) \n"); + braid_Real result; + myinner_prod(app, w, w, &result); + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: actual output: result approx. %1.2e \n", result); + _braid_ParFprintfFlush(fp, myid_x, " braid_TestStepDiff: expected output: positive, near zero \n\n"); + } + _braid_ParFprintfFlush(fp, myid_x, "Finished braid_TestStepDiff\n"); /* Free variables */ _braid_ParFprintfFlush(fp, myid_x, " braid_TestDelta: free(v) \n"); myfree(app, v); + _braid_ParFprintfFlush(fp, myid_x, " braid_TestDelta: free(w) \n"); + myfree(app, w); _braid_ParFprintfFlush(fp, myid_x, " braid_TestDelta: free(B) \n"); for (braid_Int i = 0; i < rank; i++) { @@ -1257,6 +1306,7 @@ braid_TestDelta(braid_App app, } _braid_ParFprintfFlush(fp, myid_x, " braid_TestBufBasis: inner_prod(v, v)\n"); + braid_Real result; myinner_prod(app, v, v, &result); if (result > wiggle || _braid_isnan(result)) { @@ -1321,4 +1371,4 @@ braid_TestDelta(braid_App app, _braid_ParFprintfFlush(fp, myid_x, "Finished braid_TestDelta\n"); return correct; -} \ No newline at end of file +} diff --git a/braid/braid_test.h b/braid/braid_test.h index 378fd5b1..4db8add1 100644 --- a/braid/braid_test.h +++ b/braid/braid_test.h @@ -122,6 +122,32 @@ braid_TestSpatialNorm( braid_App app, /**< User defined App ); +/** + * Test the inner_prod function.\n + * A vector is initialized at time *t1*, then the vector is normalized under the + * norm induced by inner_prod. A second vector is initialized at time *t2*, and the + * Gram Schmidt process removes the component of the second vector along the + * direction of the first. The test is inconclusive unless both vectors are nonzero + * and not orthogonal. + * + * - Returns 0 if the tests fail + * - Returns 1 if the tests pass + * - Check the log messages to see details of which tests failed. + **/ +braid_Int +braid_TestInnerProd( braid_App app, /**< User defined App structure */ + MPI_Comm comm_x, /**< Spatial communicator */ + FILE *fp, /**< File pointer (could be stdout or stderr) for log messages */ + braid_Real t1, /**< Time value used to initialize the 1st vector */ + braid_Real t2, /**< Time value used to initialize the 2nd vector (t1 != t2) */ + braid_PtFcnInit init, /**< Initialize a braid_Vector on finest temporal grid */ + braid_PtFcnFree free, /**< Free a braid_Vector */ + braid_PtFcnSum sum, /**< Compute vector sum of two braid_Vectors */ + braid_PtFcnInnerProd inner_prod /**< Compute inner product of two braid_Vectors */ + ); + + + /** * Test the inner_prod function.\n * A vector is initialized at time *t1*, then the vector is normalized under the diff --git a/braid/util.c b/braid/util.c index 2a1b15aa..53430255 100644 --- a/braid/util.c +++ b/braid/util.c @@ -283,3 +283,100 @@ _braid_MPI_Wtime(braid_Core core, braid_Int timing_level) return -1.0; } } + +/*---------------------------------------------------------------------------- + *----------------------------------------------------------------------------*/ + +braid_Int +braid_Warmup(braid_App app, + MPI_Comm comm_x, + braid_Real t, + braid_Real fdt, + braid_Real cdt, + braid_PtFcnInit init, + braid_PtFcnAccess access, + braid_PtFcnFree myfree, + braid_PtFcnClone clone, + braid_PtFcnSum sum, + braid_PtFcnSpatialNorm spatialnorm, + braid_PtFcnBufSize bufsize, + braid_PtFcnBufPack bufpack, + braid_PtFcnBufUnpack bufunpack, + braid_PtFcnSCoarsen coarsen, + braid_PtFcnSRefine refine, + braid_PtFcnStep step, + braid_PtFcnInitBasis init_basis, + braid_PtFcnInnerProd innerprod) +{ + braid_Vector u, v; + braid_Status status = _braid_CTAlloc(_braid_Status, 1); + braid_AccessStatus astatus = (braid_AccessStatus)status; + + /* init/access */ + _braid_AccessStatusInit(t, 0, 0.0, 0, 0, 0, 0, 0, 1, -1, NULL, astatus); + init(app, t, &u); + if(access != NULL) + { + access(app, u, astatus); + } + + braid_Int size; + braid_Real throwaway; + void *buffer; + + /* bufsize, bufpack, bufunpack */ + braid_BufferStatus bstatus = (braid_BufferStatus)status; + _braid_BufferStatusInit(0, 0, 0, 0, bstatus); + bufsize(app, &size, bstatus); + buffer = malloc(size); + _braid_StatusElt(bstatus, size_buffer) = size; + bufpack(app, u, buffer, bstatus); + bufunpack(app, buffer, &v, bstatus); + free(buffer); + + /* free, clone, sum, spatialnorm */ + myfree(app, v); + clone(app, u, &v); + sum(app, 1.0, u, -1.0, v); + spatialnorm(app, v, &throwaway); + + /* step */ + braid_StepStatus sstatus = (braid_StepStatus)status; + _braid_StepStatusInit(t, t+fdt, 0, 1e-16, 0, 0, 0, 2, braid_ASCaller_Warmup, NULL, sstatus); + step(app, v, NULL, u, sstatus); + + /* coarsen, refine */ + if ( (coarsen != NULL) && (refine != NULL) ) + { + braid_CoarsenRefStatus cstatus = (braid_CoarsenRefStatus)status; + + _braid_CoarsenRefStatusInit(t, t-fdt, t+fdt, t-cdt, t+cdt, 0, 0, 0, 0, cstatus); + myfree(app, v); + coarsen(app, u, &v, cstatus); + myfree(app, u); + refine(app, v, &u, cstatus); + } + + /* Delta functions: init_basis, innerprod, step with basis */ + if ( (init_basis != NULL) && (innerprod != NULL)) + { + braid_Basis B; + B = _braid_TAlloc(_braid_Basis, 1); + B->rank = 1; + B->userVecs = _braid_TAlloc(braid_Vector, 1); + init_basis(app, t, 0, &(B->userVecs[0])); + innerprod(app, u, B->userVecs[0], &throwaway); + _braid_StepStatusInit(t, t+fdt, 0, 1e-16, 0, 0, 0, 2, braid_ASCaller_Warmup, B, sstatus); + step(app, v, NULL, u, sstatus); + + myfree(app, B->userVecs[0]); + _braid_TFree(B); + } + + /* Free variables */ + myfree(app, u); + myfree(app, v); + _braid_StatusDestroy(status); + + return _braid_error_flag; +} \ No newline at end of file diff --git a/braid/util.h b/braid/util.h index 8f6d2a8a..d9916699 100644 --- a/braid/util.h +++ b/braid/util.h @@ -127,6 +127,31 @@ _braid_GetNEntries(braid_Real *_array, braid_Real _braid_MPI_Wtime(braid_Core core, braid_Int timing_level); - +/** + * Warmup calls all user functions once with sensible arguments. + * This is especially useful for the Julia wrapper, since Julia is JIT compiled, + * and this force compiles all the functions used in braid_Drive. + */ +braid_Int +braid_Warmup( braid_App app, /**< User defined App structure */ + MPI_Comm comm_x, /**< Spatial communicator */ + braid_Real t, /**< Time value to initialize test vectors with*/ + braid_Real fdt, /**< Fine time step value that you spatially coarsen from */ + braid_Real cdt, /**< Coarse time step value that you coarsen to */ + braid_PtFcnInit init, /**< Initialize a braid_Vector on finest temporal grid*/ + braid_PtFcnAccess access, /**< Allows access to XBraid and current braid_Vector (can be NULL for no writing)*/ + braid_PtFcnFree free, /**< Free a braid_Vector*/ + braid_PtFcnClone clone, /**< Clone a braid_Vector */ + braid_PtFcnSum sum, /**< Compute vector sum of two braid_Vectors */ + braid_PtFcnSpatialNorm spatialnorm, /**< Compute norm of a braid_Vector, this is a norm only over space */ + braid_PtFcnBufSize bufsize, /**< Computes size in bytes for one braid_Vector MPI buffer */ + braid_PtFcnBufPack bufpack, /**< Packs MPI buffer to contain one braid_Vector */ + braid_PtFcnBufUnpack bufunpack, /**< Unpacks MPI buffer into a braid_Vector */ + braid_PtFcnSCoarsen coarsen, /**< Spatially coarsen a vector. If NULL, skipped.*/ + braid_PtFcnSRefine refine, /**< Spatially refine a vector. If NULL, skipped.*/ + braid_PtFcnStep step, /**< Compute a time step with a braid_Vector */ + braid_PtFcnInitBasis init_basis, /**< Initialize a basis vector at time t and spatial index i. If NULL, skipped */ + braid_PtFcnInnerProd innerprod /**< Compute the inner product between two braid_Vectors. If NULL, skipped. */ + ); #endif diff --git a/drivers/julia/adaptive_theta/build_function_sundials.jl b/drivers/julia/adaptive_theta/build_function_sundials.jl new file mode 100644 index 00000000..869fdc89 --- /dev/null +++ b/drivers/julia/adaptive_theta/build_function_sundials.jl @@ -0,0 +1,101 @@ +using Symbolics, Dates + +# code generation for sundials (C) +struct SunTarget <: Symbolics.BuildTargets end + +function sunliterals(expr) + expr isa Real && return :(RCONST($(float(expr)))) + return expr +end + +function sunoperators(expr) + expr isa Expr || return expr + for e in expr.args + if e isa Expr + sunoperators(e) + end + end + for i in eachindex(expr.args) + if expr.args[i] isa Union{Float32, Float64, Rational} + # RCONST is a macro from sundials which constructs a real constant + expr.args[i] = :(RCONST($(float(expr.args[i])))) + end + end + # Introduce another factor 1 to prevent contraction of terms like "5 * t" to "5t" (not valid C code) + if expr.head==:call && expr.args[1]==:* && length(expr.args)==3 && isa(expr.args[2], Real) && isa(expr.args[3], Symbol) + push!(expr.args, 1) + # Power operator does not exist in C, replace by multiplication or "pow" + elseif expr.head==:call && expr.args[1]==:^ + @assert length(expr.args)==3 "Don't know how to handle ^ operation with <> 2 arguments" + x = expr.args[2] + n = expr.args[3] + empty!(expr.args) + # Replace by multiplication/division if + # x is a symbol and n is a small integer + # x is a more complex expression and n is ±1 + # n is exactly 0 + if (isa(n,Integer) && ((isa(x, Symbol) && abs(n) <= 3) || abs(n) <= 1)) || n==0 + if n >= 0 + append!(expr.args, [:*, fill(x, n)...]) + # fill up with factor 1 so this expr can still be a multiplication + while length(expr.args) < 3 + push!(expr.args, 1) + end + else # inverse of the above + if n==-1 + term = x + else + term = :( ($(x)) ^ ($(-n))) + coperators(term) + end + append!(expr.args, [:/, 1., term]) + end + #... otherwise use "pow" function + else + append!(expr.args, [:SUNpowerI, x, n]) + end + # replace bare real constants by RCONST + elseif expr.head==:call && (expr.args[1]==:* || expr.args[1]==:+ || expr.args[1]==:/) + for i in eachindex(expr.args) + if expr.args[i] isa Real + expr.args[i] = :(RCONST($(float(expr.args[i])))) + end + end + end + expr +end + +function Symbolics._build_function(target::SunTarget, ex::AbstractArray, args...; + conv = Symbolics.toexpr, + header = false, + fname = :diffeqf, + lhsname = :du, + rhsnames = [Symbol("RHS$i") for i in 1:length(args)]) + @info "Building function _$fname for target $target" + fname = Symbol('_' * string(fname)) + ex = hcat([row for row in eachrow(ex)]...) + varnumbercache = Symbolics.buildvarnumbercache(args...) + equations = Vector{String}() + for col ∈ axes(ex, 2), row ∈ axes(ex, 1) + lhs = string(lhsname, "[", (col-1) * size(ex,1) + row-1, "]") + rhs = Symbolics.numbered_expr(ex[row, col].val, varnumbercache, args...; + lhsname = lhsname, + rhsnames = rhsnames, + offset = -1) |> sunoperators |> sunliterals |> string + push!(equations, string(lhs, " = ", rhs, ";")) + end + + argstrs = join(vcat("sunrealtype* $(lhsname)",[typeof(args[i])<:AbstractArray ? "const sunrealtype* $(rhsnames[i])" : "const sunrealtype $(rhsnames[i])" for i in 1:length(args)]),", ") + + head = """ + #include + #include + + """ + body = "void $fname($(argstrs...))\n{$([string("\n ", eqn) for eqn ∈ equations]...)\n}\n" + if header + return head*body + else + return body + end +end diff --git a/drivers/julia/adaptive_theta/butcher-tables.jl b/drivers/julia/adaptive_theta/butcher-tables.jl new file mode 100644 index 00000000..ddc723b1 --- /dev/null +++ b/drivers/julia/adaptive_theta/butcher-tables.jl @@ -0,0 +1,260 @@ +using DataStructures, PrettyTables +using Symbolics, NLsolve + +include("build_function_sundials.jl") + +struct ButcherTable + A::AbstractMatrix + b::AbstractVector + c::AbstractVector + s::Int +end +ButcherTable(A, b) = ButcherTable(A, b, A*ones(length(b)), length(b)) +ButcherTable(A, b, c) = ButcherTable(A, b, c, length(b)) +function ButcherTable(A::Vector{<:Real}, b::Vector{<:Real}, c::Vector{<:Real}, s::Integer) + return ButcherTable(reshape(A, s, s), b, c, s) +end +function Base.show(io::IO, bt::ButcherTable) + print(io, "ButcherTable: \n") + # pretty table + b = [1.0; bt.b] + data = [bt.c bt.A; b'] + pretty_table( + data; + show_header=false, + body_hlines=[bt.s], + vlines=[:begin, 1, :end] + ) +end + +forest = OrderedDict( + # 2nd order + :{τ} => 1, + # 3rd order + :{{τ}} => 2, + :{τ, τ} => 3, + # 4th order + :{{{τ}}} => 4, + :{{τ, τ}} => 5, + :{τ, {τ}} => 6, + :{τ, τ, τ} => 7, + # 5th order + :{{{{τ}}}} => 8, + :{{{τ, τ}}} => 9, + :{{τ, {τ}}} => 10, + :{{τ, τ, τ}} => 11, + :{τ, {{τ}}} => 12, + :{τ, {τ, τ}} => 13, + :{{τ}, {τ}} => 14, + :{τ, τ, {τ}} => 15, + :{τ, τ, τ, τ} => 16 +) + +elem_weights = [ + # 2nd order + (A, b, c) -> b' * c, + # 3rd order + (A, b, c) -> b' * A * c, + (A, b, c) -> b' * c.^2, + # 4th order + (A, b, c) -> b' * A * A * c, + (A, b, c) -> b' * A * c.^2, + (A, b, c) -> b' * (c .* (A*c)), + (A, b, c) -> b' * c.^3, + # 5th order + (A, b, c) -> b' * A * A * A * c, + (A, b, c) -> b' * A * A * c.^2, + (A, b, c) -> b' * A * (c .* (A*c)), + (A, b, c) -> b' * A * c.^3, + (A, b, c) -> b' * (c .* (A*A*c)), + (A, b, c) -> b' * (c .* (A*c.^2)), + (A, b, c) -> b' * (A * c).^2, + (A, b, c) -> b' * (c .* (c .* (A*c))), + (A, b, c) -> b' * c.^4 +] + +rhs_classical = [ + # 2nd order + 1/2, + # 3rd order + 1/6, 1/3, + # 4th order + 1/24, 1/12, 1/8, 1/4, + # 5th order + 1/120, 1/60, 1/40, 1/20, 1/30, 1/15, 1/20, 1/10, 1/5 +] +num_conditions = [0, 1, 3, 7, 16] + +ψ(B::ButcherTable, tree::Expr) = elem_weights[forest[tree]](B.A, B.b, B.c) + +@variables θ[1:15] + +function gen_lhs_func(B::ButcherTable, order::Integer) + num_conds = num_conditions[order] + fA, fb, fc = [eval(build_function(syms, collect(θ[1:num_conds]); checkbounds=true)[1]) for syms ∈ (B.A, B.b, B.c)] + conds_sym = [expand(ψ(B, t)) for t ∈ forest.keys[1:num_conds]] + conds_jac = Symbolics.jacobian(conds_sym, θ[1:num_conds]) + f = eval(build_function(conds_sym, collect(θ[1:num_conds]); checkbounds=true)[2]) + J = eval(build_function(conds_jac, collect(θ[1:num_conds]); checkbounds=true)[2]) + function fill(θ::AbstractVector) + @assert length(θ) == num_conds + ButcherTable(fA(θ), fb(θ), fc(θ), B.s) + end + return f, J, fill +end + +function solve_order_conditions(f!::Function, J!::Function, fill::Function, order::Integer, rhs::Vector{<:Real}; + guess=zeros(num_conditions[order]), method=:trust_region) + @assert length(rhs) == num_conditions[order] + function g!(G, θ) + f!(G, θ) + G .-= rhs + end + result = nlsolve(g!, J!, guess; method=method) + if !result.f_converged + @warn "Order conditions solver not converged" + @info result + result = nlsolve(g!, J!, guess; method=:newton) + end + fill(result.zero) +end + +function gen_c_lhs_func(B::ButcherTable, order::Integer, name::AbstractString) + rhsnames = [:th] + num_conds = num_conditions[order] + # fill butcher table + fA = build_function( + B.A, collect(θ[1:num_conds]); + target=SunTarget(), + fname=name * "_btable_A", + lhsname=:A, rhsnames=rhsnames + ) + fb = build_function( + B.b, collect(θ[1:num_conds]); + target=SunTarget(), + fname=name * "_btable_b", + lhsname=:b, rhsnames=rhsnames + ) + fc = build_function( + B.c, collect(θ[1:num_conds]); + target=SunTarget(), + fname=name * "_btable_c", + lhsname=:c, rhsnames=rhsnames + ) + # compute order conditions + conds_sym = [expand(ψ(B, t)) for t ∈ forest.keys[1:num_conds]] + conds_jac = Symbolics.jacobian(conds_sym, θ[1:num_conds]) + func = build_function( + conds_sym, collect(θ[1:num_conds]); + target=SunTarget(), header=true, + fname=name * "_lhs", + lhsname=:phi, rhsnames=rhsnames + ) + func_j = build_function( + conds_jac, collect(θ[1:num_conds]); + target=SunTarget(), + fname=name * "_lhs_jac", + lhsname=:phi_J, rhsnames=rhsnames + ) + open("c_funcs/$name.c", "w") do file + println(file, func) + println(file, func_j) + println(file, fA) + println(file, fb) + println(file, fc) + end +end + +beuler = ButcherTable([1.0], [1.0], [1.0]) +sdirk212 = ButcherTable([1. 0.; -1. 1.], [1 / 2, 1 / 2], [1., 0.]) +sdirk212_emb = [1., 0.] + +#sdirk33 +x = 0.4358665215 +sdirk33 = ButcherTable([x 0.0 0.0; (1-x)/2 x 0; (-3.0x^2/2 + 4.0x - 1/4) (3x^2/2 - 5.0x + 5/4) x], [(-3.0x^2/2 + 4.0x - 1/4), (3.0x^2/2 - 5.0x + 5/4), x]) + +#esdirk423 +diag = 1767732205903 / 4055673282236 +A3 = zeros(4, 4) +A3[2, 1] = diag +A3[2, 2] = diag +A3[3, 1] = 2746238789719 / 10658868560708 +A3[3, 2] = -640167445237 / 6845629431997 +A3[3, 3] = diag +A3[4, 1] = 1471266399579 / 7840856788654 +A3[4, 2] = -4482444167858 / 7529755066697 +A3[4, 3] = 11266239266428 / 11593286722821 +A3[4, 4] = diag +esdirk3 = ButcherTable(A3, A3[4, :], [0, 2diag, 3 / 5, 1]) +esdirk3_emb = [2756255671327/12835298489170, -10771552573575/22201958757719, 9247589265047/10645013368117, 2193209047091/5459859503100] + +#sdirk534 +A4 = [ + 1/4 0 0 0 0; + 1/2 1/4 0 0 0; + 17/50 -1/25 1/4 0 0; + 371/1360 -137/2720 15/544 1/4 0; + 25/24 -49/48 125/16 -85/12 1/4 +] +sdirk4 = ButcherTable(A4, A4[5, :]) + + +# θ methods + +# can approximate up to 2nd order +θesdirk2 = ButcherTable([0.0 0.0; 1-θ[1] θ[1]], [1-θ[1], θ[1]]) +θesdirk2_lhs!, θesdirk2_J!, θesdirk2_fill = gen_lhs_func(θesdirk2, 2) +θesdirk2_guess = [1/2] +gen_c_lhs_func(θesdirk2, 2, "theta_esdirk2") +#= + 0 │ 0 0 + 1 │ 1 - θ θ +──────┼─────────── + 1 │ 1 - θ θ +=# + +# can approximate up to 2nd order +θsdirk2 = ButcherTable([θ[1] 0.0; 1-θ[1] θ[1]], [1-θ[1], θ[1]]) +θsdirk2_lhs!, θsdirk2_J!, θsdirk2_fill = gen_lhs_func(θsdirk2, 2) +θsdirk2_guess = [1-√2/2] +gen_c_lhs_func(θsdirk2, 2, "theta_sdirk2") +#= + θ │ θ 0 + 1 │ 1 - θ θ +──────┼─────────── + 1 │ 1 - θ θ +=# + +# can approximate up to 3rd order +θesdirk3 = ButcherTable([0. 0. 0.; θ[2]-θ[1] θ[1] 0; θ[3] (1.0-θ[3]-θ[1]) θ[1]], [θ[3], 1.0-θ[3]-θ[1], θ[1]]) +θesdirk3_lhs!, θesdirk3_J!, θesdirk3_fill = gen_lhs_func(θesdirk3, 3) +gen_c_lhs_func(θesdirk3, 3, "theta_esdirk3") +#= + 0 │ 0 0 0 + θ₂ │ θ₂ - θ₁ θ₁ 0 + 1 │ θ₃ 1 - θ₃ - θ₁ θ₁ +──────┼────────────────────────── + 2 │ θ₃ 1 - θ₃ - θ₁ θ₁ +=# + +θsdirk3 = ButcherTable([θ[1] 0.0 0.0; θ[2]-θ[1] θ[1] 0; (1.0-θ[3]-θ[1]) θ[3] θ[1]], [1.0-θ[3]-θ[1], θ[3], θ[1]]) +θsdirk3_lhs!, θsdirk3_J!, θsdirk3_fill = gen_lhs_func(θsdirk3, 3) +θsdirk3_guess = [0.4358665215, 0.71793326075, -0.6443631706532353] +gen_c_lhs_func(θsdirk3, 3, "theta_sdirk3") +#= + θ₁ │ θ₁ 0 0 + θ₂ │ θ₂ - θ₁ θ₁ 0 + 1 │ 1 - θ₃ - θ₁ θ₃ θ₁ +──────┼────────────────────────── + 2 │ 1 - θ₃ - θ₁ θ₃ θ₁ +=# + +# can approximate up to 4th order +# θsdirk4 = ButcherTable([θ[1] 0 0 0 0; θ[2]-θ[1] θ[1] 0 0; θ[3] (1-θ[3]-θ[1]) θ[1] 0 0; θ[4] 0 (1-θ[4]-θ[3]-θ[1]) θ[1]], [θ[4], 0, 1-θ[4]-θ[3]-θ[1], θ[1]]) +# θsdirk4_lhs, θsdirk4_J = gen_lhs_func(θsdirk4, 4) +# gen_c_lhs_func(θsdirk4, 4, "theta_sdirk4") +# #= +# θ₁ │ θ₁ 0 0 0 +# θ₂ │ θ₂ - θ₁ θ₁ 0 0 +# θ₃ │ θ₃ 1 - θ₃ - θ₁ θ₁ 0 +# 1 │ θ₄ 0 θ₃ θ₁ diff --git a/drivers/julia/adaptive_theta/drive-adaptive-theta.jl b/drivers/julia/adaptive_theta/drive-adaptive-theta.jl new file mode 100644 index 00000000..2e39d52a --- /dev/null +++ b/drivers/julia/adaptive_theta/drive-adaptive-theta.jl @@ -0,0 +1,267 @@ +using ToeplitzMatrices, LinearAlgebra, IterativeSolvers +using Plots +using MPI + +include("../../../braid/braid.jl/XBraid.jl") +include("butcher-tables.jl") +using .XBraid + +mutable struct AdvDifApp + # usual parameters for simulation + nx::Int + Δx::Float64 + A::Circulant + I::SymmetricToeplitz + useTheta::Bool + useRich::Bool + order::Int + cf::Int + max_levels::Int + solTf::Vector{Vector{Float64}} + residuals::Vector{Float64} + times::Vector{Float64} + spatial_tol::Float64 + + # parameters for adaptive theta + skip_downcycle::Bool + cgorder::Int + flag_refine_downcycle::Bool + fine_btable::ButcherTable + cg_btables::Vector{Vector{ButcherTable}} # butcher tables for each level/step +end + +fine_btables = [beuler, sdirk212, esdirk3, sdirk4] + +# default constructor +function AdvDifApp(nx::Integer, Δx::Real, ν::Real, α::Real, useTheta::Bool, useRich::Bool, order::Integer, cf::Integer, ml::Integer; spatial_tol=1e-3, cgorder=order+1, skip=true) + I = SymmetricToeplitz([1, zeros(nx-1)...]) + Δ = (1/Δx^2)Circulant([-2, 1, zeros(nx-3)..., 1]) + ∇ = (1/Δx)Circulant([0, -1/2, zeros(nx-3)..., 1/2]) + # ∇ = (1/Δx)Circulant([-1, 1, zeros(nx-2)...]) + A = ν*Δ - α*∇ + AdvDifApp(nx, Δx, A, I, useTheta, useRich, order, cf, ml, [], [], [], spatial_tol, skip, cgorder, false, fine_btables[order], []) +end + +mutable struct ThetaVector + u::Vector{Float64} + ψ::Vector{Float64} + t_prior::Float64 +end +function ThetaVector(u, order::Integer) + numconditions = [0, 1, 3, 7, 16] + order > 5 && return ThetaVector(u, zeros(numconditions[5]), 0.) + return ThetaVector(u, zeros(numconditions[order]), 0.) +end + +function dirk_step!(app::AdvDifApp, u::Vector{<:Real}, Δt::Real, btable::ButcherTable; embedding=nothing) + k = zeros(app.nx, btable.s) + for i in 1:btable.s + k[:, i] .= Δt * app.A * (u .+ sum(btable.A[i, j]*k[:, j] for j ∈ 1:i-1; init=zeros(app.nx))) + if btable.A[i, i] != 0 + k[:, i] .= (app.I - Δt * btable.A[i, i] * app.A) \ k[:, i] + end + end + u .= u + sum(btable.b[j] * k[:, j] for j ∈ 1:btable.s) + if embedding !== nothing + @assert length(embedding) == btable.s + err_est = sum(embedding[j] * k[:, j] for j ∈ 1:btable.s) - sum(btable.b[j] * k[:, j] for j ∈ 1:btable.s) + return norm(err_est) + end + return 0. +end + +function get_btable(app::AdvDifApp, status::XBraid.Status.StepStatus, ti::Integer) + level = XBraid.Status.getLevel(status) + if level == 0 || !app.useTheta + return app.fine_btable + end + iu, il = XBraid.Status.getTIUL(status, level) + i = ti - il + 1 + return app.cg_btables[level][i] +end + +#===================== +XBraid functions +=====================# + +function my_init(app, t) + t == 0.0 && return ThetaVector(sin.(range(0., 2π-app.Δx, app.nx)), app.cgorder) + u = randn(app.nx) + # u = zeros(app.nx) + return ThetaVector(u, app.cgorder) +end + +T = forest +function my_step!(app::AdvDifApp, status::XBraid.Status.StepStatus, u::ThetaVector, ustop::ThetaVector, tstart, tstop) + Δt = tstop - tstart + ti = XBraid.Status.getTIndex(status) + caller = XBraid.Status.getCallingFunction(status) + level = XBraid.Status.getLevel(status) + + # get Butcher table for this step + btable = get_btable(app, status, ti) + + # f-relax + if app.flag_refine_downcycle && caller ∈ [XBraid.CallerFRestrict, XBraid.CallerFASResidual] + cfactor = XBraid.Status.getCFactor(status) + # first step of f-interval + if XBraid.isCPoint(ti, cfactor) + u.ψ .= 0. + u.t_prior = tstart + end + Δt_prior = tstart - u.t_prior + ψ_old = deepcopy(u.ψ) + ψ_step = [ψ(btable, t) for t ∈ T.keys[1:length(u.ψ)]] + + # second order + u.ψ[T[:{τ}]] += Δt^2 * ψ_step[T[:{τ}]] + Δt * Δt_prior + + # third order + if app.cgorder >= 3 + u.ψ[T[:{{τ}}]] += Δt^3 * ψ_step[T[:{{τ}}]] + Δt * ψ_old[T[:{τ}]] + Δt^2 * Δt_prior * ψ_step[T[:{τ}]] + u.ψ[T[:{τ, τ}]] += Δt^3 * ψ_step[T[:{τ, τ}]] + 2Δt^2 * Δt_prior * ψ_step[T[:{τ}]] + Δt * Δt_prior^2 + end + + # last step of f-interval + if XBraid.isCPoint(ti+1, cfactor) + # normalize order conditions + Δt_c = tstop - u.t_prior + # 2nd order + u.ψ[T[:{τ}]] /= Δt_c^2 + f!, J!, fill, guess = θsdirk2_lhs!, θsdirk2_J!, θsdirk2_fill, θsdirk2_guess + # 3rd order + if app.cgorder >= 3 + u.ψ[T[:{{τ}}]] /= Δt_c^3 + u.ψ[T[:{τ, τ}]] /= Δt_c^3 + f!, J!, fill, guess = θsdirk3_lhs!, θsdirk3_J!, θsdirk3_fill, θsdirk3_guess + end + + # solve order conditions: + # @info """ + # Solving order conditions at t = $tstop + # ψ = $(u.ψ) + # """ + iu, il = XBraid.Status.getTIUL(status, level+1) + ic = XBraid.mapFineToCoarse(ti, cfactor) - il + 1 + app.cg_btables[level+1][ic] = solve_order_conditions(f!, J!, fill, app.cgorder, u.ψ; guess=guess) + end + end + + # turn off order conditions once upcycle starts + if app.flag_refine_downcycle && caller == XBraid.Status.CallerFInterp + @info "done computing coarse grid butcher tables" + app.flag_refine_downcycle = false + end + + # finally, take the step + embed = nothing + if level == 0 && app.order == 2 + embed = sdirk212_emb + end + # println("level: $level, ti: $ti") + # display(btable) + # println("emb: $embed") + err_est = dirk_step!(app, u.u, Δt, btable; embedding=embed) + if err_est >= app.spatial_tol && caller != XBraid.CallerWarmup + XBraid.Status.setRFactor(status, 2) + end +end + +function my_sync!(app::AdvDifApp, status::XBraid.SyncStatus) + app.useTheta || return + caller = XBraid.Status.getCallingFunction(status) + iter = XBraid.Status.getIter(status) + # println("sync! caller: $caller, iter: $(XBraid.Status.getIter(status))") + + if caller == XBraid.CallerDrive_AfterInit || caller == XBraid.Status.CallerFRefine_AfterInitHier + app.cg_btables = Vector{ButcherTable}[] + # initialize storage for coarse grid butcher tables + nlevels = XBraid.Status.getNLevels(status) + for l ∈ 1:nlevels-1 + iu, il = XBraid.Status.getTIUL(status, l) + ncpoints = iu - il + 1 + push!(app.cg_btables, [app.fine_btable for i ∈ 1:ncpoints]) + end + + # turn on computation of coarse grid butcher tables + app.flag_refine_downcycle = true + end + + # make sure we still compute these when we skip the first downcycle + if app.skip_downcycle && caller == XBraid.CallerDrive_TopCycle && iter == 0 + app.flag_refine_downcycle = true + end + app.flag_refine_downcycle && @info "Recomputing coarse grid butcher tables" + return +end + +function my_norm(app::AdvDifApp, u::ThetaVector) + LinearAlgebra.normInf(u.u) +end + +function my_sum!(app::AdvDifApp, α::Real, x::ThetaVector, β::Real, y::ThetaVector) + y.u .= α * x.u + β * y.u +end + +function my_access(app::AdvDifApp, status, u) + XBraid.Status.getWrapperTest(status) && return + t = XBraid.Status.getT(status) + ti = XBraid.Status.getTIndex(status) + ntime = XBraid.Status.getNTPoints(status) + done = XBraid.Status.getDone(status) + if ti == ntime + push!(app.solTf, deepcopy(u.u)) + end + if done + push!(app.times, t) + end +end + +function test(;ν=1., α=1., order=1) + MPI.Init() + comm = MPI.COMM_WORLD + + my_app = AdvDifApp(512, 2π/513, ν, α, false, false, order, 2, 2) + app = XBraid.BraidApp(my_app, comm, my_step!, my_init, my_access; sum=my_sum!, spatialnorm=my_norm) + + XBraid.testInitAccess(app, 0.) + XBraid.testSum(app, 0.) + XBraid.testSpatialNorm(app, 0.) + u = my_init(my_app, 0.) + println(my_norm(my_app, u)) + plot(u.u, label="u0", legend=:bottomright) + my_step!(my_app, XBraid.StepStatus(), u, u, 0., .05) + println(my_norm(my_app, u)) + plot!(u.u, label="u1") +end + +function main(;tstop=2π, ntime=512, nx=512, ν=1., α=1., useTheta=false, useRich=false, cf=4, ml=2, maxiter=10, order=1, skip=false, refine=false, maxrefine=2, finefcf=true, tol=1e-3) + MPI.Init() + comm = MPI.COMM_WORLD + + Δx = 2π / nx + app = AdvDifApp(nx, Δx, ν, α, useTheta, useRich, order, cf, ml; skip=skip, spatial_tol=tol) + + core = XBraid.Init(comm, 0., tstop, ntime, my_step!, my_init, my_access; + app=app, sync=my_sync!, sum=my_sum!, spatialnorm=my_norm) + + XBraid.setPrintLevel(core, 2) + XBraid.setMaxLevels(core, ml) + XBraid.setCFactor(core, -1, app.cf) + XBraid.setAccessLevel(core, 1) + XBraid.setNRelax(core, -1, 1) + XBraid.setAbsTol(core, 1e-8) + XBraid.setMaxIter(core, maxiter) + XBraid.setSkip(core, skip) + XBraid.setRefine(core, refine) + XBraid.setMaxRefinements(core, maxrefine) + if !finefcf + XBraid.setNRelax(core, 0, 0) + end + + XBraid.Drive(core) + + append!(app.residuals, XBraid.getRNorms(core)) + + return app +end \ No newline at end of file diff --git a/drivers/julia/drive-TaylorGreen.jl b/drivers/julia/drive-TaylorGreen.jl new file mode 100644 index 00000000..e1266e09 --- /dev/null +++ b/drivers/julia/drive-TaylorGreen.jl @@ -0,0 +1,329 @@ +using ForwardDiff, DiffResults, LinearAlgebra, PreallocationTools +using IterativeSolvers, LinearMaps, SparseArrays, Interpolations +using MPI, BenchmarkTools +using Plots + +include("../../braid/braid.jl/XBraid.jl") +using .XBraid + +MPI.Init() +comm = MPI.COMM_WORLD + +Float = Float64 + +""" +struct to package preallocated caches for storing temp coords and velocity +as well as the sparse poisson matrix needed by project_incompressible!() +""" +struct TGApp + x_d::DiffCache + u_d::DiffCache + ϕ_d::DiffCache + Δ_s::SparseMatrixCSC + solution + lyapunov_vecs + lyapunov_exps + times +end + +# default constructor +function TGApp(x::AbstractArray, u::AbstractArray, ϕ::AbstractArray, Δ::SparseMatrixCSC) + TGApp(DiffCache(x), DiffCache(u), DiffCache(ϕ), Δ, [], [], [], []) +end + +""" +reshapes 1D array into vector of 3D arrays, with no allocation +""" +function get_views(u) + @views begin + u_x = reshape(u[1:nₓ^3], (nₓ, nₓ, nₓ)) + u_y = reshape(u[nₓ^3+1:2*nₓ^3], (nₓ, nₓ, nₓ)) + u_z = reshape(u[2*nₓ^3+1:end], (nₓ, nₓ, nₓ)) + end + return [u_x, u_y, u_z] +end + +# some helpers for doing modulo arithmetic with CartesianIndex +Base.mod1(I::CartesianIndex, J::CartesianIndex) = CartesianIndex(broadcast(mod1, Tuple(I), Tuple(J))) +δ(I::CartesianIndex{N}, dim) where N = CartesianIndex(ntuple(i -> i == dim ? 1 : 0, N)) + +function TaylorGreen!(u, k = 1) + kx = trunc(Int64, k/2) + k%2 + ky = trunc(Int64, k/2) + 1 + kz = trunc(Int64, k/2) + 1 + u_x, u_y, u_z = u + x, y, z = coordinates + @. u_x = sin(kx * x) * cos(ky * y) * cos(kz * z) + @. u_y = -cos(kx * x) * sin(ky * y) * cos(kz * z) + @. u_z = 0.0 + return +end + +""" +finite difference approx first derivative +of A wrt dimension *dim* +""" +function ∂(A, dim; h = Δx) + # This works for A of arbitrary dimension + out = similar(A) + R = CartesianIndices(A) + bound = last(R) + @views @inbounds for I ∈ R + # second order + I⁻ = mod1(I - δ(I, dim), bound) + I⁺ = mod1(I + δ(I, dim), bound) + out[I] = 1 / 2h * (A[I⁺] - A[I⁻]) + end + return out +end + +# divergence, curl, gradient, and laplacian +∇_dot(V) = +sum(enumerate(V)) do (i, V_i) + ∂(V_i, i) +end + +function ∇X(V) + out_arr = zeros(3 * nₓ^3) + out = get_views(out_arr) + Vx, Vy, Vz = V + x, y, z = 1:3 + out[x] .= ∂(Vz, y) - ∂(Vy, z) + out[y] .= ∂(Vx, z) - ∂(Vz, x) + out[z] .= ∂(Vy, x) - ∂(Vx, y) + return out +end + +∇(F) = [∂(F, i) for i ∈ 1:3] +# Δ(F) = ∇_dot(∇(F)) # this is asymmetric... + +function my_poisson(f_arr) + out_arr = similar(f_arr) + F = reshape(f_arr, (nₓ, nₓ, nₓ)) + out = reshape(out_arr, (nₓ, nₓ, nₓ)) + R = CartesianIndices(F) + bound = last(R) + @inbounds for I ∈ R + out[I] = -6 * F[I] + @inbounds for dim ∈ 1:3 + I⁺ = mod1(I + δ(I, dim), bound) + I⁻ = mod1(I - δ(I, dim), bound) + out[I] += F[I⁺] + F[I⁻] + end + out[I] *= 1 / Δx^2 + end + # need a single point condition u(0,0,0) = 0., else + # this system is singular (ones(nₓ^3) nullspace) + out[first(R)] += F[first(R)] + return out_arr +end + +function my_poisson_mat(nₓ) + flat_index(I::CartesianIndex) = I[1] + (I[2] - 1) * nₓ + (I[3] - 1) * nₓ^2 + rowinds = Array{Int}([]) + colinds = Array{Int}([]) + nzs = Array{Float}([]) + function make_connection!(I_r, I_c, nz) + push!(rowinds, flat_index(I_r)) + push!(colinds, flat_index(I_c)) + push!(nzs, nz) + end + diag = -6 / Δx^2 + offd = 1 / Δx^2 + bound = CartesianIndex((nₓ, nₓ, nₓ)) + for I in CartesianIndices((1:nₓ, 1:nₓ, 1:nₓ)) + make_connection!(I, I, diag) + for dim ∈ 1:3 + I⁺ = mod1(I + δ(I, dim), bound) + I⁻ = mod1(I - δ(I, dim), bound) + make_connection!(I, I⁺, offd) + make_connection!(I, I⁻, offd) + end + end + # point condition u(0,0,0) = 0., else system is singular + nzs[1] += 1.0 + return sparse(rowinds, colinds, nzs) +end + +""" +compute self advection of u +""" +function advect_semi_lagrangian!(app::TGApp, u_arr, Δt) + # get cached arrays (either real or dual, depending on typeof(u)) + coords_new_arr = get_tmp(app.x_d, u_arr) + u_new_arr = get_tmp(app.u_d, u_arr) + + u = get_views(u_arr) + u_new = get_views(u_new_arr) + + # (1) trace each grid point backwards along u + @. coords_new_arr = coords_arr - Δt * u_arr + coords_new = get_views(coords_new_arr) + + # (2) interpolate the velocity field at the new grid points + x_range = range(0., 2π - Δx, nₓ) + for i in 1:3 + itp = interpolate(u[i], BSpline(Linear(Periodic()))) + sitp = scale(itp, x_range, x_range, x_range) + extp = extrapolate(sitp, Periodic()) + + u_new[i] .= extp.(coords_new...) + end + u_arr .= u_new_arr + return u_arr +end + +""" +Solve Poisson problem for incompressible velocity +Δϕ = ∇⋅u +u = u - ∇ϕ +""" +function project_incompressible!(app::TGApp, u_arr) + u = get_views(u_arr) + ϕ_arr = get_tmp(app.ϕ_d, u_arr) + ϕ_arr .= 0. + rhs = vec(∇_dot(u)) + cg!(ϕ_arr, app.Δ_s, rhs; abstol=1/2*Δx^2) + # cg!(ϕ_arr, app.Δ_s, rhs) + + ϕ = reshape(ϕ_arr, (nₓ, nₓ, nₓ)) + for i in 1:3 + u[i] .-= ∂(ϕ, i) + end + return u_arr +end + +function base_step(app::TGApp, u_arr, Δt) + # print("Advection : ") + advect_semi_lagrangian!(app, u_arr, Δt) + # diffuse_backward_Euler!(u_arr, Δt) # numerical diffusion may be enough + # print("Projection: ") + project_incompressible!(app, u_arr) + return u_arr +end + +nₓ = 64 +Δx = 2π / nₓ + +function my_init(app, t) + u_arr = zeros(Float, 3 * nₓ^3) + u = get_views(u_arr) + TaylorGreen!(u) + return u_arr +end + +function my_basis_init(app, t, i) + ψ_arr = zeros(Float, 3 * nₓ^3) + ψ = get_views(ψ_arr) + TaylorGreen!(ψ, i + 1) + ψ_arr .+= Δx^3*randn(Float, 3 * nₓ^3) + return ψ_arr +end + +function my_step!(app, status, u, ustop, tstart, tstop) + u = base_step(app, u, tstop - tstart) + return +end + +function my_step!(app, status, u, ustop, tstart, tstop, Ψ) + Δt = tstop - tstart + rank = length(Ψ) + Ψ_new = reduce(hcat, Ψ) + perturb(r) = base_step(app, u + r' * Ψ, Δt) + + result = DiffResults.DiffResult(u, Ψ_new) + result = ForwardDiff.jacobian!(result, perturb, zeros(rank)) + for i in 1:rank + Ψ[i] .= Ψ_new[:, i] + end + return +end + +function my_sum!(app, a, x, b, y) + @. y = a * x + b * y +end + +function my_access(app, status, u) + index = XBraid.Status.getTIndex(status) + lyap_exps = XBraid.Status.getLocalLyapExponents(status) + lyap_vecs = XBraid.Status.getBasisVectors(status) + if length(lyap_vecs) > 0 + push!(app.lyapunov_vecs, deepcopy(reduce(hcat, lyap_vecs))) + push!(app.lyapunov_exps, lyap_exps) + end + + push!(app.solution, deepcopy(u)) + push!(app.times, index) +end + +my_norm(app, u) = Δx^3 * LinearAlgebra.norm2(u) +my_innerprod(app, u, v) = u' * v + +coords_arr = zeros(Float, 3 * nₓ^3) +coordinates = get_views(coords_arr) +x_bound = 2π - Δx +x_range = range(0, x_bound, nₓ) +coordinates[1] .= [x for x ∈ x_range, y ∈ x_range, z ∈ x_range] +coordinates[2] .= [y for x ∈ x_range, y ∈ x_range, z ∈ x_range] +coordinates[3] .= [z for x ∈ x_range, y ∈ x_range, z ∈ x_range]; + +function test() + # preallocations + x_d = zeros(Float, 3 * nₓ^3) + u_d = zeros(Float, 3 * nₓ^3) + ϕ_d = zeros(Float, nₓ^3) + Δ = my_poisson_mat(nₓ) + my_app = TGApp(x_d, u_d, ϕ_d, Δ) + + # test user routines: + test_app = XBraid.BraidApp( + my_app, comm, comm, + my_step!, my_init, + my_sum!, my_norm, my_access, + my_basis_init, my_innerprod) + + XBraid.testInitAccess(test_app, 0.0) + XBraid.testClone(test_app, 0.0) + XBraid.testSpatialNorm(test_app, 0.0) + XBraid.testBuf(test_app, 0.0) + @time XBraid.testDelta(test_app, 0.0, 0.1, 3) + + curlz = ∇X(get_views(my_app.solution[1]))[3] + heatmap(curlz[:, :, 1]) +end + +function main(;ml=1, tstop=4., ntime=32, delta_rank=0) + # preallocations + x_d = zeros(Float, 3 * nₓ^3) + u_d = zeros(Float, 3 * nₓ^3) + ϕ_d = zeros(Float, nₓ^3) + Δ = my_poisson_mat(nₓ) + my_app = TGApp(x_d, u_d, ϕ_d, Δ) + + # theme(:dracula) + tstart = 0.0 + + core = XBraid.Init( + comm, comm, tstart, tstop, ntime, + my_step!, my_init, my_sum!, my_norm, my_access; app = my_app, + )::XBraid.BraidCore + + if delta_rank > 0 + XBraid.SetDeltaCorrection(core, delta_rank, my_basis_init, my_innerprod) + XBraid.SetLyapunovEstimation(core; exponents=true) + end + XBraid.SetMaxLevels(core, ml) + XBraid.SetMaxIter(core, 10) + XBraid.SetCFactor(core, -1, 2) + XBraid.SetAccessLevel(core, 1) + XBraid.SetNRelax(core, -1, 0) + XBraid.SetAbsTol(core, 1e-6) + + @time XBraid.Drive(core) + + p = sortperm(my_app.times) + curlz_start = ∇X(get_views(my_app.solution[p][1]))[3] + curlz_end = ∇X(get_views(my_app.solution[p][end]))[3] + plot(heatmap(curlz_start[1, :, :]), heatmap(curlz_end[1, :, :]), size=(900,400)) + return my_app +end \ No newline at end of file diff --git a/drivers/julia/drive-advdiff-theta.jl b/drivers/julia/drive-advdiff-theta.jl new file mode 100644 index 00000000..d79b7b2c --- /dev/null +++ b/drivers/julia/drive-advdiff-theta.jl @@ -0,0 +1,151 @@ +using ToeplitzMatrices, LinearAlgebra +using Plots +using MPI +using IterativeSolvers + +include("../../braid/braid.jl/XBraid.jl") +using .XBraid + +struct AdvDifApp + nx::Int + Δx::Float64 + A::Circulant + I::SymmetricToeplitz + useTheta::Bool + useRich::Bool + cf::Int + order::Int + solTf::Vector{Vector{Float64}} + residuals::Vector{Float64} +end +# default constructor +function AdvDifApp(nx::Integer, Δx::Real, ν::Real, α::Real, useTheta::Bool, useRich::Bool, cf::Integer, order::Integer) + I = SymmetricToeplitz([1, zeros(nx-1)...]) + # Δ = (1/Δx^2)SymmetricToeplitz([-2, 1, zeros(nx-2)...]) + # ∇ = (1/Δx)Toeplitz([0, -1/2, zeros(nx-2)...], [0, 1/2, zeros(nx-2)...]) + Δ = (1/Δx^2)Circulant([-2, 1, zeros(nx-3)..., 1]) + ∇ = (1/Δx)Circulant([0, -1/2, zeros(nx-3)..., 1/2]) + # ∇ = (1/Δx)Circulant([-1, 1, zeros(nx-2)...]) + A = ν*Δ - α*∇ + AdvDifApp(nx, Δx, A, I, useTheta, useRich, cf, order, [], []) +end + +function my_init(app, t) + t == 0.0 && return sin.(range(0., 2π-app.Δx, app.nx)) + return randn(app.nx) +end + +function backward_euler!(app, u, Δt) + u .= (app.I - Δt * app.A) \ u + # u .= cg(app.I - Δt * app.A, u) + return u +end + +function θ_euler!(app, u, Δt, m) + θ = (m - 1)/2m + u .= (app.I - (1-θ)Δt * app.A) \ ((app.I + θ*Δt * app.A) * u) + return u +end + +function SDIRK2!(app, u, Δt) + a = 1/√2 + k1 = Δt*((app.I - (1-a) * Δt * app.A) \ (app.A*u)) + k2 = Δt*((app.I - (1-a) * Δt * app.A) \ (app.A*(u + (2a-1)*k1))) + u .+= 1/2*(k1 + k2) + return u +end + +function θSDIRK2!(app, u, Δt, m) + α = √2√(m*(m-1))/2m + θ = (m^2 -3m - √2√(m*(m-1)))/(2m^2 - 4m) + k1 = Δt*((app.I - (1-α) * Δt * app.A) \ (app.A*u)) + k2 = Δt*((app.I - (1-α) * Δt * app.A) \ (app.A*(u + (2α-1)*k1))) + u .+= (θ)k1 + (1-θ)k2 + return u +end + +function base_step!(app, u, Δt, level; step_scaling=1) + if app.order == 2 + if app.useTheta + throw("Theta method not implemented for order 2") + end + return SDIRK2!(app, u, Δt) + end + + if app.useTheta && level > 0 + return θSDIRK2!(app, u, Δt, app.cf^level ÷ step_scaling) + # return θ_euler!(app, u, Δt, app.cf^level) + end + + return backward_euler!(app, u, Δt) +end + +function my_step!(app, status, u, ustop, tstart, tstop) + Δt = tstop - tstart + level = XBraid.Status.getLevel(status) + if !app.useRich || level == 0 + return base_step!(app, u, Δt, level) + end + m = app.cf ^ level + p = app.order + if app.useTheta + p += 1 + end + θ = 2^p * (m^p - 1) / (m^p * (2^p - 1)) + u_sub = deepcopy(u) + base_step!(app, u_sub, Δt/2, level; step_scaling=2) + base_step!(app, u_sub, Δt/2, level; step_scaling=2) + base_step!(app, u, Δt, level) + @. u = θ*u_sub + (1-θ)*u + return u +end + +function my_access(app, status, u) + XBraid.Status.getWrapperTest(status) && return + ti = XBraid.Status.getTIndex(status) + ntime = XBraid.Status.getNTPoints(status) + if ti == ntime + push!(app.solTf, deepcopy(u)) + end +end + +function test(;ν=1., α=1., order=1) + MPI.Init() + comm = MPI.COMM_WORLD + + my_app = AdvDifApp(512, 2π/513, ν, α, false, false, 2, order) + app = XBraid.BraidApp(my_app, comm, my_step!, my_init, my_access) + + XBraid.testInitAccess(app, 0.) + XBraid.testSpatialNorm(app, 0.) + u = my_init(my_app, 0.) + plot(u, label="u0", legend=:bottomright) + my_step!(my_app, nothing, u, nothing, 0., .1) + plot!(u, label="u1") +end + + +function main(;tstop=2π, ntime=512, nx=512, ν=1., α=1., useTheta=false, useRich=false, cf=2, ml=2, order=1) + MPI.Init() + comm = MPI.COMM_WORLD + + Δx = 2π / nx + app = AdvDifApp(nx, Δx, ν, α, useTheta, useRich, cf, order) + + core = XBraid.Init(comm, 0.0, tstop, ntime, my_step!, my_init, my_access; app=app) + + XBraid.SetPrintLevel(core, 2) + XBraid.SetMaxLevels(core, ml) + XBraid.SetCFactor(core, -1, app.cf) + XBraid.SetAccessLevel(core, 1) + XBraid.SetNRelax(core, -1, 1) + XBraid.SetAbsTol(core, 1e-8) + XBraid.SetMaxIter(core, 40) + XBraid.SetSkip(core, false) + + XBraid.Drive(core) + + append!(app.residuals, XBraid.GetRNorms(core)) + + return app +end \ No newline at end of file diff --git a/drivers/julia/drive-burgers.jl b/drivers/julia/drive-burgers.jl new file mode 100644 index 00000000..d0c05d96 --- /dev/null +++ b/drivers/julia/drive-burgers.jl @@ -0,0 +1,306 @@ +using LinearAlgebra, PreallocationTools, ForwardDiff, DiffResults +using MPI, Interpolations, IterativeSolvers +using SparseArrays, ToeplitzMatrices +using FFTW +using Plots +using LinearAlgebra: norm2 +using Random: seed! +# theme(:dracula) + +include("../../braid/braid.jl/XBraid.jl") +using .XBraid + +MPI.Init() +comm = MPI.COMM_WORLD + +struct BurgerApp + x::Vector{Float64} + κ::Frequencies{Float64} + μ::Float64 + cf::Int + useTheta::Bool + # preallocated caches that support ForwardDiff: + x_d::DiffCache + y_d::DiffCache + # storage for solution values + solution::Vector{Vector{Float64}} + lyap_vecs::Vector{Union{Matrix{Float64}, Vector{Float64}}} + lyap_exps::Vector{Vector{Float64}} + times::Vector{Int} + # pre-planned fourier transform + P̂ +end + +function BurgerApp(x::Vector{<:Real}, κ::Frequencies{<:Real}, μ::Real, cf::Integer, useTheta::Bool, x_cache::AbstractArray, y_cache::AbstractArray) + BurgerApp(x, κ, μ, cf, useTheta, DiffCache(x_cache), DiffCache(y_cache), [], [], [], [], plan_fft(x)) +end + +# system parameters (globally scoped) +const lengthScale = 2π +const nₓ = 1024 +const Δx = lengthScale / nₓ + +function stencil_to_circulant(stencil::Vector{T}, nx::Integer) where T + @assert isodd(length(stencil)) "stencil should have odd number of entries" + stencilWidth = length(stencil) + center = ceil(Int, length(stencil)/2) + columnView = @view(stencil[end:-1:1]) + v = [columnView[center:end]; zeros(T, nx-stencilWidth); columnView[1:center-1]] + return Circulant(v) +end + +function euler_mat(Δt::Float64; μ::Float64=.00001) + # diffusion + # A = spdiagm(-1 => ones(nₓ-1), 0 => -2*ones(nₓ), 1 => ones(nₓ-1)) + # A[1, end] = 1 + # A[end, 1] = 1 + # A = sparse(I, (nₓ, nₓ)) - Δt/Δx^2 * μ * A + + # KS operator + ∇⁴ = [1, -4, 6, -4, 1] + ∇² = [0, 1, -2, 1, 0] + I = [0, 0, 1, 0, 0] + stencil = @. I - Δt*(-1/Δx^4 * ∇⁴ - 1/Δx^2 * ∇²) + # stencil = @. I - Δt(-μ/Δx^4 * ∇⁴) + stencil_to_circulant(stencil, nₓ) +end + +function semi_lagrangian!(burger::BurgerApp, y, v, Δt) + x_back = get_tmp(burger.x_d, y) + y_intp = get_tmp(burger.y_d, y) + + # initialize interpolation + itp = interpolate(y, BSpline(Linear(Periodic()))) + sitp = scale(itp, range(0., lengthScale - Δx, nₓ)) + extp = extrapolate(sitp, Periodic()) + + x_back .= burger.x .- Δt * v + + # y_intp .= interp_periodic.(x_back, Ref(y)) + y_intp .= extp.(x_back) + y .= y_intp + return y +end + +function diffuse_beuler!(burger::BurgerApp, y, Δt::Float64; init_guess=nothing) + y_tmp = get_tmp(burger.y_d, y) + y_tmp .= y + + if init_guess !== nothing + y .= init_guess + end + + cg!(y, euler_mat(Δt), y_tmp) + return y +end + +function diffuse_fft!(burger::BurgerApp, y::AbstractArray, Δt::Real) + P̂ = burger.P̂ + κ = burger.κ + # y .= real(P̂ \ (exp.(Δt*(κ.^2 - κ.^4)))) + ŷ = P̂ * y + @. ŷ *= exp(-burger.μ * Δt * κ^2) # standard diffusion + # @. ŷ *= exp(Δt*(κ^2 - κ^4)) # KS-equation operator + y .= real(P̂ \ ŷ) + return y +end + +function extractPartials(y::Vector{ForwardDiff.Dual{T,V,P}}) where {T,V,P} + ps = zeros(V, size(y)..., P) + for i ∈ eachindex(y), j ∈ 1:P + @inbounds ps[i, j] = ForwardDiff.partials(y[i], j) + end + return ps +end + +function fillDualArray!(y::Vector{ForwardDiff.Dual{T,V,P}}, vs, ps) where {T,V,P} + checkbounds(ps, firstindex(y), 1:P) + for i ∈ eachindex(y) + @inbounds y[i] = ForwardDiff.Dual{T}(vs[i], ntuple(j -> @inbounds(ps[i, j]), P)) + end +end + +# this enables ForwardDiff through the FFT where it normally doesn't work +function diffuse_fft!(burger::BurgerApp, y::Vector{ForwardDiff.Dual{T,V,P}}, Δt::Real) where {T, V, P} + vs = ForwardDiff.value.(y) + ps = extractPartials(y) + diffuse_fft!(burger, vs, Δt) + map(eachcol(ps)) do p diffuse_fft!(burger, p, Δt) end + fillDualArray!(y, vs, ps) + return y +end + +# user routines: +function my_init(burger::BurgerApp, t::Float64) + # sin + u = sin.(2π/lengthScale * burger.x) + # gaussian + # u = exp.(-(burger.x .- lengthScale/2).^2 / (lengthScale/5)^2) + # if t == 0 + # seed!(1234) + # u .+= 1e-1randn(nₓ) .* u + # end + # @. u = exp(-(burger.x - lengthScale/2)^2) + return u +end + +function my_basis_init(burger::BurgerApp, t::Float64, k::Int32) + x = burger.x + ψ = similar(x) + if k % 2 == 0 + @. ψ = cos(k/2*x) + else + @. ψ = sin((k+1)/2*x) + end + return ψ +end + +function base_step!(burger::BurgerApp, u, Δt::Float64; init_guess=nothing) + u_mid = deepcopy(u) + # semi_lagrangian!(burger, u_mid, u, Δt/2) + # semi_lagrangian!(burger, u, u_mid, Δt) # u(∇ u) + # semi_lagrangian!(burger, u, sin.(burger.x), Δt) # ∇ u + semi_lagrangian!(burger, u, u, Δt) # u(∇ u) + if burger.μ > 0. + diffuse_fft!(burger, u, Δt) # Δu + end +end + +function my_step!( + burger::BurgerApp, status::XBraid.Status.StepStatus, + u, ustop, tstart::Float64, tstop::Float64 +) + Δt = tstop - tstart + level = XBraid.Status.getLevel(status) + if !burger.useTheta || level == 0 + base_step!(burger, u, Δt; init_guess=ustop) + else + # richardson based θ method + m = burger.cf^level + θ = 2(m - 1)/m + # p = 0.86 + # p = 1. + # θ = 2^p * (m^p - 1) / (m^p * (2^p - 1)) + # θ = 2 + u_sub = deepcopy(u) + base_step!(burger, u_sub, Δt/2; init_guess=(ustop .- u)./2) + base_step!(burger, u_sub, Δt/2; init_guess=ustop) + base_step!(burger, u, Δt; init_guess=u_sub) + @. u = θ*u_sub + (1-θ)*u + end + return u +end + +function my_step!(burger::BurgerApp, status::XBraid.Status.StepStatus, u, ustop, tstart::Float64, tstop::Float64, Ψ) + rank = length(Ψ) + Ψ_new = reduce(hcat, Ψ) + # perturb(r) = base_step!(burger, u + r' * Ψ, Δt) + perturb(r) = my_step!(burger, status, u + r' * Ψ, ustop, tstart, tstop) + + result = DiffResults.DiffResult(u, Ψ_new) + result = ForwardDiff.jacobian!(result, perturb, zeros(rank)) + for i in eachindex(Ψ) + Ψ[i] .= Ψ_new[:, i] + end +end + +function my_sum!(burger, a, x, b, y) + @. y = a*x + b*y +end + +function my_access(burger::BurgerApp, status::XBraid.Status.AccessStatus, u) + XBraid.Status.getWrapperTest(status) && return + ti = XBraid.Status.getTIndex(status) + push!(burger.solution, deepcopy(u)) + push!(burger.times, ti) + if XBraid.Status.getDeltaRank(status) > 0 + Ψ = XBraid.Status.getBasisVectors(status) + λ = XBraid.Status.getLocalLyapExponents(status) + push!(burger.lyap_vecs, deepcopy(reduce(hcat, Ψ))) + push!(burger.lyap_exps, deepcopy(λ)) + end +end + +# test user routines: +function test() + x_new = zeros(nₓ) + y_new = zeros(nₓ) + x = Array(range(0., lengthScale-Δx, nₓ)) + κ = 2π/lengthScale .* fftfreq(nₓ, nₓ) + burger = BurgerApp(x, κ, 1e-3, 4, false, x_new, y_new); + test_app = XBraid.BraidApp(burger, comm, my_step!, my_init, my_access; basis_init=my_basis_init) + + open("drive-burgers.test.out", "w") do file + cfile = Libc.FILE(file) + XBraid.testBuf(test_app, 0.0, cfile) + XBraid.testSpatialNorm(test_app, 0.0, cfile) + XBraid.testDelta(test_app, 0.0, 0.1, 3, cfile) + end + # plot!(burger.lyap_vecs[1][1]) + # plot(burger.solution[1]) +end + +function main(;tstop=π, ntime=nₓ, deltaRank=0, useTheta=false, fmg=false, ml=1, cf=4, saveGif=false, maxiter=30, μ=0.) + x_new = zeros(nₓ) + y_new = zeros(nₓ) + x = Array(range(0., lengthScale-Δx, nₓ)) + κ = 2π/lengthScale .* fftfreq(nₓ, nₓ) + burger = BurgerApp(x, κ, μ, 4, useTheta, x_new, y_new); + + tstart = 0.0 + core = XBraid.Init(comm, tstart, tstop, ntime, my_step!, my_init, my_access; app=burger) + + if deltaRank > 0 + XBraid.setDeltaCorrection(core, deltaRank, my_basis_init) + XBraid.setLyapunovEstimation(core; exponents=true) + end + + # println("Wrapper test:") + # @time test() + + XBraid.setMaxIter(core, maxiter) + XBraid.setMaxLevels(core, ml) + XBraid.setCFactor(core, -1, cf) + XBraid.setAccessLevel(core, 1) + XBraid.setNRelax(core, -1, 1) + XBraid.setAbsTol(core, 1e-6) + XBraid.setSkip(core, false) + if fmg + XBraid.setFMG(core) + XBraid.setNFMG(core, 1) + end + + XBraid.setTimings(core, 2) + XBraid.Drive(core) + XBraid.printTimers(core) + + if deltaRank > 0 + exponents = sum(burger.lyap_exps) + exponents = MPI.Allreduce(exponents, (+), comm) + exponents ./= tstop + if MPI.Comm_rank(comm) == 0 + println("exponents: ", exponents) + end + end + + if saveGif + for (ti, u) in zip(burger.times, burger.solution) + plt = plot(burger.x, u; ylim=(-1.5, 1.5)) + savefig(plt, "burgers_gif/$(lpad(ti, 6, "0")).png") + end + MPI.Barrier(comm) + + if MPI.Comm_rank(comm) == 0 + println("animating...") + fnames = [lpad(i, 6, "0") for i in 0:ntime] + anim = Animation("burgers_gif", fnames) + Plots.buildanimation(anim, "burgers_gif/anim.gif", fps=30) + end + end + + return burger, XBraid.getRNorms(core) +end + +# test() +# main(); +nothing \ No newline at end of file diff --git a/drivers/julia/drive-kflow.jl b/drivers/julia/drive-kflow.jl new file mode 100644 index 00000000..9c61f738 --- /dev/null +++ b/drivers/julia/drive-kflow.jl @@ -0,0 +1,431 @@ +using ForwardDiff, DiffResults, LinearAlgebra, PreallocationTools +using Interpolations, FFTW +using MPI, Base.Threads +using BenchmarkTools, Plots, LaTeXStrings +using Statistics: mean +using Random: seed! +using LinearAlgebra: norm, normInf + +include("../../braid/braid.jl/XBraid.jl") +using .XBraid + + +Float = Float64 + +""" +struct containing everything needed internally for the simulation +""" +struct KFlowApp + cf::Int + useTheta::Bool + deltaRank::Int + coords::Array{Float, 3} + κ::Frequencies{Float} + k_num::Int + ℜ::Float + solution::Vector{Array{Float, 3}} + solTf::Vector{Array{Float, 3}} + lyapunov_vecs::Vector{Vector{Array{Float, 3}}} + lyapunov_exps::Vector{Vector{Float}} + times::Vector{Int} + x_d::DiffCache{Array{Float, 3}} + u_d::DiffCache{Array{Float, 3}} + ϕ_d::DiffCache{Array{Complex{Float}, 2}} + P̂::FFTW.FFTWPlan +end + +# default constructor +function KFlowApp(cf::Integer, useTheta::Bool, deltaRank::Integer, k_num::Integer, ℜ::Real) + coords = zeros(Float, nₓ, nₓ, 2) + x_range = range(0, lengthScale - Δx, nₓ) + @views @inbounds for (i, x) ∈ enumerate(x_range), (j, y) ∈ enumerate(x_range) + coords[i, j, 1] = x + coords[i, j, 2] = y + end + κ = 2π / lengthScale .* fftfreq(nₓ, nₓ) + # preallocations + x_d = zeros(Float, nₓ, nₓ, 2) + u_d = zeros(Float, nₓ, nₓ, 2) + ϕ_d = zeros(Complex{Float}, nₓ, nₓ) + KFlowApp(cf, useTheta, deltaRank, coords, κ, k_num, ℜ, [], [], [], [], [], DiffCache(x_d), DiffCache(u_d), DiffCache(ϕ_d), plan_fft(coords, [1, 2])) +end + +oneball(n) = [(n + 1 - i, i) for i ∈ 1:n] +wavenumbers(shell) = reduce(hcat, [oneball(i) for i ∈ 1:shell]) +is_in_shell(i, shell) = i <= trunc(Int, shell * (shell + 1) / 2) + +""" +compute ith wavenumber for a 2D scalar field +""" +function get_wavenumber2D(i) + shell = 1 + for _ ∈ 1:i + is_in_shell(i, shell) && break + shell += 1 + end + shell_start = trunc(Int, shell * (shell - 1) / 2) + rel_i = i - shell_start + return oneball(shell)[rel_i] +end + +""" +compute the ith Fourier mode of a 1D scalar field +""" +function fourierMode1D(x::Real, k::Integer) + k % 2 == 1 && return cos(trunc(k / 2) * 2π / lengthScale * x) + k % 2 == 0 && return sin(trunc((k + 1) / 2) * 2π / lengthScale * x) +end + +""" +compute the ith Fourier mode of a 2D scalar field +""" +function fourierMode2D!(app::KFlowApp, u_field, i::Integer) + kx, ky = get_wavenumber2D(i) + @views x, y = app.coords[:, :, 1], app.coords[:, :, 2] + @. u_field = fourierMode1D(x, kx) * fourierMode1D(y, ky) +end + +""" +compute the ith Fourier mode of a 2D vector field +""" +function fourierMode2DVec!(app::KFlowApp, u, i::Integer) + @views ux, uy = u[:, :, 1], u[:, :, 2] + kx, ky = get_wavenumber2D(i) + fourierMode2D!(app, ux, kx) + fourierMode2D!(app, uy, ky) +end + +""" +set the initial condition to a Taylor-Green vortex +""" +function TaylorGreen!(app::KFlowApp, u, k = 1) + coords = app.coords + kx = trunc(Int, k / 2) + k % 2 + ky = trunc(Int, k / 2) + 1 + @views u_x, u_y = u[:, :, 1], u[:, :, 2] + @views x, y = coords[:, :, 1], coords[:, :, 2] + @. u_x = sin(kx * x) * cos(ky * y) + @. u_y = -cos(kx * x) * sin(ky * y) + return +end + +""" +compute the forcing term +""" +function kolmogorovForce!(app::KFlowApp, u, Δt; μ = 32.) + k = app.k_num + # Fₖ(x, y) = (sin(ky), 0) + @views @. u[:, :, 1] += μ * Δt * sin(k * app.coords[:, :, 2]) + return u +end + +""" +compute self advection of u +""" +function advect_semi_lagrangian!(app::KFlowApp, u::AbstractArray, v::AbstractArray, Δt::Real) + # get cached arrays (either real or dual, depending on typeof(u)) + coords_new = get_tmp(app.x_d, u) + u_new = get_tmp(app.u_d, u) + + # (1) trace each grid point backwards along u + @. coords_new = app.coords - Δt * v + + # (2) interpolate the velocity field at the new grid points + x_range = range(0.0, lengthScale - Δx, nₓ) + for i in 1:2 + itp = interpolate(u[:, :, i], BSpline(Linear(Periodic()))) + sitp = scale(itp, x_range, x_range) + extp = extrapolate(sitp, Periodic()) + + u_new[:, :, i] .= @views extp.(coords_new[:, :, 1], coords_new[:, :, 2]) + end + u .= u_new + return u +end + +""" +Solve Poisson problem for incompressible velocity field +Δϕ = ∇⋅u +u = u - ∇ϕ +""" +function project_incompressible!(app::KFlowApp, u::AbstractArray, Δt::Real) + κ = get_tmp(app.x_d, u) + ϕ = get_tmp(app.ϕ_d, u) + P̂ = app.P̂ + ℜ = app.ℜ + for (i, κ_x) ∈ enumerate(app.κ), (j, κ_y) ∈ enumerate(app.κ) + κ[i, j, 1] = κ_x + κ[i, j, 2] = κ_y + end + û = P̂ * u + @views κ_x, κ_y = κ[:, :, 1], κ[:, :, 2] + @views û_x, û_y = û[:, :, 1], û[:, :, 2] + # diffusion + @. û_x *= exp(-Δt / ℜ * (κ_x^2 + κ_y^2)) + @. û_y *= exp(-Δt / ℜ * (κ_x^2 + κ_y^2)) + + # KS equation: uₜ + u(∇u) = - Δu - Δ²u + # @. û_x *= exp(Δt*(κ_x^2 + κ_y^2 - (κ_x^2 + κ_y^2)^2)) + # @. û_y *= exp(Δt*(κ_x^2 + κ_y^2 - (κ_x^2 + κ_y^2)^2)) + + # incompressible projection + # ϕ = inv(Δ)∇⋅u + @. ϕ = -(1.0im * κ_x * û_x + 1.0im * κ_y * û_y) / (κ_x^2 + κ_y^2) + ϕ[1, 1] = 0.0 + 0.0im # pressure is unique up to a constant + # u = u - ∇ϕ + @. û_x -= 1.0im * κ_x * ϕ + @. û_y -= 1.0im * κ_y * ϕ + + u .= real(P̂ \ û) + return u +end + +function extractPartials(u::AbstractArray{ForwardDiff.Dual{T, V, P}}) where {T, V, P} + ps = zeros(V, size(u)..., P) + for I ∈ CartesianIndices(u), j ∈ 1:P + @inbounds ps[I, j] = ForwardDiff.partials(u[I], j) + end + return ps +end + +function fillArrayOfDuals!(u::AbstractArray{ForwardDiff.Dual{T, V, P}}, vs::AbstractArray{V}, ps::AbstractArray{V}) where {T, V, P} + checkbounds(ps, first(CartesianIndices(u)), 1:P) # make inbounds safe + for I ∈ CartesianIndices(u) + @inbounds u[I] = ForwardDiff.Dual{T}(vs[I], ntuple(j -> @inbounds(ps[I, j]), P)) + end +end + +# this enables ForwardDiff through the FFT where it normally doesn't work +function project_incompressible!(app::KFlowApp, u::AbstractArray{ForwardDiff.Dual{T, V, P}}, Δt::Real) where {T, V, P} + vs = ForwardDiff.value.(u) + ps = extractPartials(u) + # @info "size(ps)" + project_incompressible!(app, vs, Δt) + map(eachslice(ps, dims = 4)) do p + project_incompressible!(app, p, Δt) + end + fillArrayOfDuals!(u, vs, ps) + return u +end + +function ∇_dot(app, u) + κ = get_tmp(app.x_d, u) + for (i, κ_x) ∈ enumerate(app.κ), (j, κ_y) ∈ enumerate(app.κ) + κ[i, j, 1] = κ_x + κ[i, j, 2] = κ_y + end + @views κx, κy = κ[:, :, 1], κ[:, :, 2] + û = app.P̂ * u + out = @. 1.0im * κx * û[:, :, 1] + 1.0im * κy * û[:, :, 2] + return real(ifft(out)) +end + +function ∇X(app::KFlowApp, V) + κ = get_tmp(app.x_d, V) + for (i, κ_x) ∈ enumerate(app.κ), (j, κ_y) ∈ enumerate(app.κ) + κ[i, j, 1] = κ_x + κ[i, j, 2] = κ_y + end + f = get_tmp(app.ϕ_d, V) + V̂ = app.P̂ * V + @views V̂x, V̂y = V̂[:, :, 1], V̂[:, :, 2] + @views κx, κy = κ[:, :, 1], κ[:, :, 2] + out = @. 1.0im * κx * V̂y - 1.0im * κy * V̂x + return real(ifft(out)) +end + +function spectralInterpolation(f, targetSize) + size_f = size(f)[1] # assume square + targetSize > size_f || return f + npad = floor(Int, targetSize / 2 - size_f / 2) + f̂ = fftshift(fft(f)) + C = eltype(f̂) + f̂_interp = [ + zeros(C, npad, npad) zeros(C, npad, size_f) zeros(C, npad, npad) + zeros(C, size_f, npad) f̂ zeros(C, size_f, npad) + zeros(C, npad, npad) zeros(C, npad, size_f) zeros(C, npad, npad) + ] + return real(ifft(ifftshift(f̂_interp))) +end + +function base_step!(app::KFlowApp, u::AbstractArray, Δt::Float) + kolmogorovForce!(app, u, Δt) + advect_semi_lagrangian!(app, u, u, Δt) + project_incompressible!(app, u, Δt) + return u +end + +function my_init(app::KFlowApp, t::Float) + seed!(1) + u = zeros(Float, nₓ, nₓ, 2) + # u = randn(Float, nₓ, nₓ, 2) + TaylorGreen!(app, u) + project_incompressible!(app, u, 1e-1app.ℜ) + u[:, :, 1] .-= mean(u[:, :, 1]) + u[:, :, 2] .-= mean(u[:, :, 2]) + u ./= my_norm(app, u)/2 + return u +end + +function my_basis_init(app::KFlowApp, t::Float, i::Integer) + ψ = zeros(Float, nₓ, nₓ, 2) + fourierMode2DVec!(app, ψ, i) + return ψ +end + +function my_step!( + app::KFlowApp, status::XBraid.StepStatus, + u::AbstractArray, ustop::AbstractArray, + tstart::Real, tstop::Real, +) + Δt = tstop - tstart + level = XBraid.Status.getLevel(status) + iter = XBraid.Status.getIter(status) + if !app.useTheta || level == 0 + u = base_step!(app, u, Δt) + else + # richardson based θ method + m = app.cf ^ level + p = 1 + θ = 2^p * (m^p - 1) / (m^p * (2^p - 1)) + # θ = 2 + u_sub = deepcopy(u) + base_step!(app, u_sub, Δt / 2) + base_step!(app, u_sub, Δt / 2) + base_step!(app, u, Δt) + @. u = θ * u_sub + (1 - θ) * u + end + return u +end + +function my_step!( + app::KFlowApp, status::XBraid.StepStatus, + u::AbstractArray, ustop::AbstractArray, + tstart::Real, tstop::Real, + Ψ::Vector{Any} +) + rank = length(Ψ) + Ψ_new = reduce((a, b) -> cat(a, b, dims = 4), Ψ) + perturb(r) = my_step!(app, status, u + r' * Ψ, ustop, tstart, tstop) + + result = DiffResults.DiffResult(u, Ψ_new) + result = ForwardDiff.jacobian!(result, perturb, zeros(rank)) + for i in 1:rank + Ψ[i] .= Ψ_new[:, :, :, i] + end + return +end + +function my_access(app::KFlowApp, status::XBraid.AccessStatus, u) + XBraid.Status.getWrapperTest(status) && return + ntime = XBraid.Status.getNTPoints(status) + index = XBraid.Status.getTIndex(status) + level, done = XBraid.Status.getLevel(status), XBraid.Status.getDone(status) + + if level == 0 && done && index % app.cf == 0 + push!(app.solution, deepcopy(u)) + push!(app.times, index) + rank = XBraid.Status.getDeltaRank(status) + if rank > 0 + exps = XBraid.Status.getLocalLyapExponents(status) + vecs = XBraid.Status.getBasisVectors(status) + push!(app.lyapunov_exps, exps) + push!(app.lyapunov_vecs, deepcopy(vecs)) + end + end + + if level == 0 && index == ntime-1 + # last time step + push!(app.solTf, deepcopy(u)) + end +end + +my_norm(app, u) = normInf(u) + +const lengthScale = 2π +const nₓ = 256 +const Δx = lengthScale / nₓ + +MPI.Init() +comm = MPI.COMM_WORLD + +function test() + my_app = KFlowApp(2, false, 3, 2, 1600) + + test_app = XBraid.BraidApp(my_app, comm, my_step!, my_init, my_access; spatialnorm=my_norm, basis_init=my_basis_init) + + XBraid.testInitAccess(test_app, 0.0) + XBraid.testClone(test_app, 0.0) + XBraid.testSpatialNorm(test_app, 0.0) + XBraid.testBuf(test_app, 0.0) + XBraid.testDelta(test_app, 0.0, 0.1, my_app.deltaRank) + + # curl = ∇X(my_app, my_app.solution[1]) + # heatmap(curl') +end + +function main(;tstop=10.0, ntime=1024, deltaRank=0, ml=1, cf=2, maxiter=10, fcf=1, relaxLyap=false, savegif=false, expPlot=false, useTheta=false, k=4, ℜ=100, deferDelta=(1, 1)) + my_app = KFlowApp(cf, useTheta, deltaRank, k, ℜ) + + tstart = 0.0 + + core = XBraid.Init(comm, tstart, tstop, ntime, my_step!, my_init, my_access; spatialnorm=(app, u)->normInf(u), app=my_app) + + if deltaRank > 0 + XBraid.setDeltaCorrection(core, deltaRank, my_basis_init) + # XBraid.setDeferDelta(core, deferDelta...) + XBraid.setLyapunovEstimation(core; relax = relaxLyap, exponents = true) + end + XBraid.setMaxLevels(core, ml) + XBraid.setMaxIter(core, maxiter) + XBraid.setCFactor(core, -1, cf) + XBraid.setNRelax(core, -1, fcf) + XBraid.setAccessLevel(core, 2) + XBraid.setAbsTol(core, 1e-6) + + XBraid.setTimings(core, 2) + XBraid.Drive(core) + XBraid.printTimers(core) + + p = sortperm(my_app.times) + + if deltaRank > 0 + exponents = sum(my_app.lyapunov_exps) + exponents = MPI.Allreduce(exponents, (+), comm) + exponents ./= tstop - tstart + if MPI.Comm_rank(comm) == 0 + println("exponents: ", exponents) + end + end + + if deltaRank > 0 && expPlot + movingaverage(g, n) = [i < n ? mean(g[begin:i]) : mean(g[i-n+1:i]) for i in eachindex(g)] + exps = reduce(hcat, my_app.lyapunov_exps[p])' + nt = size(exps)[1] + Δt = tstop / nt + exps = movingaverage.([exps[:, i] ./ Δt for i ∈ 1:size(exps)[2]], length(exps[:, 1])) + exps_plot = plot(exps; legend = false) + savefig(exps_plot, "lyapunov_exps.png") + end + + if savegif + preprocess(app, u) = spectralInterpolation(∇X(app, u), 128)' + heatmapArgs = Dict(:ticks => false, :colorbar => false, :aspect_ratio => :equal) + count = 1 + anim = @animate for i in p[1:cf:end] + # for i in p + u = my_app.solution[i] + plots = [heatmap(preprocess(my_app, u); heatmapArgs...)] + for j in 1:min(8, deltaRank) + ψⱼ = my_app.lyapunov_vecs[i][j] + push!(plots, heatmap(preprocess(my_app, ψⱼ); heatmapArgs...)) + end + fig = plot(plots...; size = (600, 600)) + # savefig(fig, "kflow_gif/beamer/kolmo_$(nₓ)_$(ntime)_ml$(ml)-$(count).png") + count += 1 + end + gif(anim, "kflow_gif/kolmo_$(nₓ)_$(ntime)_ml$(ml).gif", fps = 20) + end + + return my_app, core +end diff --git a/examples/README.md b/examples/README.md index 66242155..acb2b0b2 100644 --- a/examples/README.md +++ b/examples/README.md @@ -80,6 +80,14 @@ For the Cython examples, see the corresponding *.pyx file. examples/ex-01-cython-alt/ex_01_alt-setup.py + + *ex-01-julia/*: is a directory containing an example using the Braid-Julia + interface defined in braid.jl ( braid/braid.jl ). It solves the same scalar + ODE equation as the ex-01 series described above. + + For instructions on running see + + examples/ex-01-julia/README.md + 2. ex-02 implements the 1D heat equation on a regular grid, using a very simple implementation. This is the next example to read after the various ex-01 cases. diff --git a/examples/ex-01-julia/README.md b/examples/ex-01-julia/README.md new file mode 100644 index 00000000..ccf7b16d --- /dev/null +++ b/examples/ex-01-julia/README.md @@ -0,0 +1,39 @@ + + +To run examples in this directory: +1. build braid as a shared library: + + shell> make braid shared=yes + +2. install and configure MPI.jl: https://juliaparallel.org/MPI.jl/v0.20/ + + julia> using Pkg; Pkg.add(["MPI", "MPIPreferences"]) + julia> using MPIPreferences; MPIPreferences.use_system_binary() + + NOTE: It might be necessary to give the path to the system MPI library, e.g.: + + julia> MPIPreferences.use_system_binary(; library_names="/opt/homebrew/lib/libmpi") + +3. run from the command line: + + shell> mpirun -n 4 julia ex-01.jl diff --git a/examples/ex-01-julia/ex-01.jl b/examples/ex-01-julia/ex-01.jl new file mode 100644 index 00000000..f53a5489 --- /dev/null +++ b/examples/ex-01-julia/ex-01.jl @@ -0,0 +1,85 @@ +#=BHEADER********************************************************************** + * Copyright (c) 2013, Lawrence Livermore National Security, LLC. + * Produced at the Lawrence Livermore National Laboratory. + * + * This file is part of XBraid. For support, post issues to the XBraid Github page. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License (as published by the Free Software + * Foundation) version 2.1 dated February 1999. + * + * This program is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the terms and conditions of the GNU General Public + * License for more details. + * + * You should have received a copy of the GNU Lesser General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ***********************************************************************EHEADER=# + +# main ex-01 +# include("XBraid.jl") +include("../../braid/braid.jl/XBraid.jl") +using .XBraid +using MPI + +MPI.Init() +comm = MPI.COMM_WORLD + +#= +using 1-element vector here since standard Float64 +values are immutable and don't have stable addresses. +=# +function my_init(app, t) + t == 0.0 && return [1.0] + return [0.456] +end + +# This must mutate u in place +function my_step!(app, status, u, ustop, tstart, tstop) + # backward Euler + u[] = 1.0 / (1.0 + tstop - tstart) * u[] +end + +#= +called by the solver to give access to +solution values. +=# +function my_access(app, status, u) + XBraid.Status.getWrapperTest(status) && return + t = XBraid.Status.getT(status) + ti = XBraid.Status.getTIndex(status) + print("t: $(t[]),\tu: $(u[])\n") +end + +#= +all other usual wrapper functions are either not required, +or are set to reasonable defaults for operating on julia Arrays. 😎 +=# + +test = false +if test + test_app = XBraid.BraidApp(nothing, comm, my_step!, my_init, my_access) + + open("ex_01.test.out", "w") do file + cfile = Libc.FILE(file) + XBraid.testInitAccess(test_app, 0.0, cfile) + XBraid.testClone(test_app, 0.0, cfile) + XBraid.testSum(test_app, 0.0, cfile) + XBraid.testSpatialNorm(test_app, 0.0, cfile) + XBraid.testBuf(test_app, 0.0, cfile) + end + MPI.Barrier(comm) +end + +ntime = 10 +tstart = 0.0 +tstop = tstart + ntime / 2.0; +core = XBraid.Init(comm, tstart, tstop, ntime, my_step!, my_init, my_access; + print_level=2, max_levels=2, abs_tol=1.e-6, c_factor=2) + +XBraid.Drive(core) + +# no need for braid_Destroy(core), julia will take care of it :) diff --git a/makefile.inc b/makefile.inc index 0b246039..fbeff554 100644 --- a/makefile.inc +++ b/makefile.inc @@ -22,10 +22,12 @@ # #EHEADER********************************************************************** -# Three compile time options +# five compile time options # make debug=yes|no # make valgrind=yes|no # make sequential=yes|no +# make shared=yes|no +# make fortran=yes|no # Was DEBUG specified? ifeq ($(debug),no) @@ -60,6 +62,7 @@ ifeq ($(findstring tux408,$(HOSTNAME)),tux408) MPICXX = mpicxx MPIF90 = mpif90 LFLAGS = -lm + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -lm -fPIC CXXFLAGS = -g -Wall -Wno-reorder -lm -fPIC @@ -80,6 +83,7 @@ else ifeq ($(shell uname -s), Darwin) MPICXX = mpicxx MPIF90 = mpif90 LFLAGS = -lm -lstdc++ + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -fPIC CXXFLAGS = -g -Wall -fPIC @@ -95,6 +99,7 @@ else ifeq ($(findstring cab,$(HOSTNAME)),cab) MPICXX = mpiicpc MPIF90 = mpif90 LFLAGS = -lm + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -fPIC CXXFLAGS = -g -Wall -fPIC @@ -110,6 +115,7 @@ else ifeq ($(findstring vulcan,$(HOSTNAME)),vulcan) MPICXX = mpixlcxx MPIF90 = mpixlf90 LFLAGS = -lm + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -fPIC CXXFLAGS = -g -Wall -fPIC @@ -126,6 +132,7 @@ else ifeq ($(findstring cadaverous,$(HOSTNAME)),cadaverous) MPICXX = /usr/bin/mpicxx MPIF90 = /usr/bin/mpif90 LFLAGS = -lm + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -fPIC CXXFLAGS = -g -Wall -fPIC @@ -140,6 +147,7 @@ else ifeq ($(shell uname -s),Linux) MPICXX = mpicxx MPIF90 = mpif90 LFLAGS = -lm + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -fPIC CXXFLAGS = -g -Wall -fPIC @@ -157,6 +165,7 @@ else MPICXX = mpicxx MPIF90 = mpif90 LFLAGS = -lm + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -fPIC CXXFLAGS = -g -Wall -fPIC @@ -174,6 +183,7 @@ ifeq ($(sequential),YES) MPICXX = g++ MPIF90 = gfortran LFLAGS = -lm + SHAREDFLAGS = -shared ifeq ($(optlevel),DEBUG) CFLAGS = -g -Wall -D braid_SEQUENTIAL CXXFLAGS = -g -Wall -D braid_SEQUENTIAL