diff --git a/.github/workflows/build_cuda.yml b/.github/workflows/build_cuda.yml new file mode 100644 index 0000000000..fdc5690d89 --- /dev/null +++ b/.github/workflows/build_cuda.yml @@ -0,0 +1,149 @@ +#TODO: Fix and then move this as matrix arch into build.yml and + +name: Install and test Firedrake (CUDA) + +on: + push: + branches: + - master + pull_request: + +concurrency: + # Cancels jobs running if new commits are pushed + group: > + ${{ github.workflow }}- + ${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + test: + name: Install and test Firedrake (Linux) + strategy: + # We want to know all of the tests which fail, so don't kill real if + # complex fails and vice-versa + fail-fast: false + matrix: + arch: [default] + runs-on: [self-hosted, Linux, gpu] + container: + image: nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04 + options: --gpus all + env: + OMPI_ALLOW_RUN_AS_ROOT: 1 + OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1 + OMP_NUM_THREADS: 1 + OPENBLAS_NUM_THREADS: 1 + FIREDRAKE_CI: 1 + PYOP2_CI_TESTS: 1 + PYOP2_SPMD_STRICT: 1 + EXTRA_PYTEST_ARGS: --use_gpu_aware_mpi=0 --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 firedrake-repo/tests/firedrake + steps: + - name: Fix HOME + # For unknown reasons GitHub actions overwrite HOME to /github/home + # which will break everything unless fixed + # (https://github.com/actions/runner/issues/863) + run: echo "HOME=/root" >> "$GITHUB_ENV" + + - name: Pre-run cleanup + # Make sure the current directory is empty + run: find . -delete + + - uses: actions/checkout@v4 + with: + path: firedrake-repo + + - name: Install system dependencies + run: | + apt-get update + apt-get install -y curl + apt-get install -y git + apt-get install -y python3 + apt install -y python3.12-venv + apt-get install -y parallel + apt-get install -y bison + apt-get install -y cmake + apt-get install -y flex + apt-get install -y gfortran + apt-get install -y libopenblas-dev + apt-get install -y ninja-build + apt-get install -y pkg-config + apt-get install -y python3-dev + apt-get install -y libfftw3-dev + apt-get install -y libhwloc-dev + + # Missing for now: + # libfftw3-mpi-dev + # libopenmpi-dev + # libhdf5-mpi-dev + + # apt-get -y install \ + # $(python3 ./firedrake-repo/scripts/firedrake-configure --arch ${{ matrix.arch }} --show-system-packages) + # run a list of app packages and grep openmpi and fail + - name: Install PETSc + run: | + git clone --depth 1 --branch $(python3 ./firedrake-repo/scripts/firedrake-configure --show-petsc-version) https://gitlab.com/petsc/petsc.git + cd petsc + # TODO update configure file + ./configure --with-make-np=8 --with-c2html=0 --with-debugging=0 --with-fortran-bindings=0 --with-shared-libraries=1 --with-strict-petscerrorcode PETSC_ARCH=arch-firedrake-default --COPTFLAGS=-O3 -march=native -mtune=native --CXXOPTFLAGS=-O3 -march=native -mtune=native --FOPTFLAGS=-O3 -march=native -mtune=native --download-bison --download-fftw --download-hdf5 --download-hwloc --download-metis --download-mumps --download-netcdf --download-pnetcdf --download-ptscotch --download-scalapack --download-suitesparse --download-superlu_dist --download-zlib --with-cuda --with-cuda-dir=/usr/local/cuda CUDAPPFLAGS=-Wno-deprecated-gpu-targets --download-openmpi --download-slepc + make PETSC_DIR=/__w/firedrake/firedrake/petsc PETSC_ARCH=arch-firedrake-default all + export PETSC_DIR=/__w/firedrake/firedrake/petsc + export PETSC_ARCH=arch-firedrake-default + export MPI_HOME=$PETSC_DIR/$PETSC_ARCH + export CC=$PETSC_DIR/$PETSC_ARCH/bin/mpicc + export CXX=$PETSC_DIR/$PETSC_ARCH/bin/mpicxx + export MPICC=$CC + export PATH=$PETSC_DIR/$PETSC_ARCH/bin:$PATH + export SLEPC_DIR=$PETSC_DIR/$PETSC_ARCH + export HDF5_DIR=$PETSC_DIR/$PETSC_ARCH + export HDF5_MPI=ON + make PETSC_DIR=/__w/firedrake/firedrake/petsc PETSC_ARCH=arch-firedrake-default check MPIEXEC="mpiexec --allow-run-as-root" + + - name: Install Firedrake + id: install + run: | + # TODO update configure file for the exports + # export $(python3 ./firedrake-repo/scripts/firedrake-configure --arch ${{ matrix.arch }} --show-env) + export PETSC_DIR=/__w/firedrake/firedrake/petsc + export PETSC_ARCH=arch-firedrake-default + export MPI_HOME=$PETSC_DIR/$PETSC_ARCH + export CC=$PETSC_DIR/$PETSC_ARCH/bin/mpicc + export CXX=$PETSC_DIR/$PETSC_ARCH/bin/mpicxx + export MPICC=$CC + export PATH=$PETSC_DIR/$PETSC_ARCH/bin:$PATH + export SLEPC_DIR=$PETSC_DIR/$PETSC_ARCH + export HDF5_DIR=$PETSC_DIR/$PETSC_ARCH + export HDF5_MPI=ON + python3 -m venv venv + . venv/bin/activate + : # Force a rebuild of petsc4py as the cached one will not link to the fresh + : # install of PETSc. A similar trick may be needed for compiled dependencies + : # like h5py or mpi4py if changing HDF5/MPI libraries. + pip cache remove petsc4py + pip cache remove slepc4py + pip cache remove h5py + # python -c "import petsc4py; print(petsc4py.get_config())" + pip install --verbose --no-binary h5py './firedrake-repo[ci]' + firedrake-clean + : # Extra test dependencies + pip install matplotlib vtk + pip list + + - name: Run smoke tests + run: | + . venv/bin/activate + firedrake-check + timeout-minutes: 10 + + - name: Run tests (nprocs = 1) + # Run even if earlier tests failed + if: ${{ success() || steps.install.conclusion == 'success' }} + run: | + . venv/bin/activate + : # Use pytest-xdist here so we can have a single collated output (not possible + : # for parallel tests) + firedrake-run-split-tests 1 1 "-n 8 $EXTRA_PYTEST_ARGS" firedrake-repo/tests/firedrake + timeout-minutes: 60 + + - name: Post-run cleanup + if: always() + run: find . -delete diff --git a/demos/helmholtz/helmholtz.txt b/demos/helmholtz/helmholtz.txt new file mode 100644 index 0000000000..95334ccb6a --- /dev/null +++ b/demos/helmholtz/helmholtz.txt @@ -0,0 +1,57 @@ +Main Stage 366614 +Main Stage;firedrake 44369 +Main Stage;firedrake;firedrake.solving.solve 86 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve 196 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve 140 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval 736 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute 212 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute;Parloop_Cells_wrap_form0_cell_integral 112 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute;Parloop_Cells_wrap_form0_cell_integral;pyop2.global_kernel.GlobalKernel.compile 415552 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;firedrake.tsfc_interface.compile_form 42597 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval 866 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute 149 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute;Parloop_Cells_wrap_form00_cell_integral 136 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute;Parloop_Cells_wrap_form00_cell_integral;pyop2.global_kernel.GlobalKernel.compile 407506 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__ 1771 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.tsfc_interface.compile_form 56423 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.tsfc_interface.compile_form;firedrake.formmanipulation.split_form 1907 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.solving_utils._SNESContext.__init__ 618 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__ 145 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.ufl_expr.action 4387 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.variational_solver.NonlinearVariationalProblem.__init__ 332 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.variational_solver.NonlinearVariationalProblem.__init__;firedrake.ufl_expr.adjoint 2798 +Main Stage;firedrake;firedrake.function.Function.interpolate 342 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble 5644 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate 29 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute 298 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel 204 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel;pyop2.global_kernel.GlobalKernel.compile 682292 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.make_interpolator 40658 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write 2473 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate 303 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble 1080 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate 23 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute 328 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel 165 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel;pyop2.global_kernel.GlobalKernel.compile 663410 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.make_interpolator 55147 +Main Stage;firedrake;firedrake.__init__ 495196 +Main Stage;firedrake;firedrake.assemble.assemble 949 +Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute 310 +Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute;Parloop_Cells_wrap_form_cell_integral 95 +Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute;Parloop_Cells_wrap_form_cell_integral;pyop2.global_kernel.GlobalKernel.compile 355507 +Main Stage;firedrake;firedrake.assemble.assemble;firedrake.tsfc_interface.compile_form 20219 +Main Stage;firedrake;CreateFunctionSpace 919 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace 79 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__ 165 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data 13 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__ 825 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__;FunctionSpaceData: CreateElement 1274 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__;firedrake.mesh.MeshTopology._facets 789 +Main Stage;firedrake;CreateFunctionSpace;CreateMesh 147 +Main Stage;firedrake;CreateFunctionSpace;CreateMesh;Mesh: numbering 376 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh 12 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh 11 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh 834 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh;CreateMesh 676 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh;DMPlexInterp 382 diff --git a/firedrake/preconditioners/__init__.py b/firedrake/preconditioners/__init__.py index 491a73657b..1cec69ab49 100644 --- a/firedrake/preconditioners/__init__.py +++ b/firedrake/preconditioners/__init__.py @@ -13,3 +13,4 @@ from firedrake.preconditioners.hiptmair import * # noqa: F401 from firedrake.preconditioners.facet_split import * # noqa: F401 from firedrake.preconditioners.bddc import * # noqa: F401 +from firedrake.preconditioners.offload import * # noqa: F401 diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py new file mode 100644 index 0000000000..451ce97c6a --- /dev/null +++ b/firedrake/preconditioners/offload.py @@ -0,0 +1,71 @@ +from firedrake.preconditioners.assembled import AssembledPC +from firedrake.petsc import PETSc +import firedrake.dmhooks as dmhooks + +__all__ = ("OffloadPC",) + + +class OffloadPC(AssembledPC): + """Offload PC from CPU to GPU and back. + + Internally this makes a PETSc PC object that can be controlled by + options using the extra options prefix ``offload_``. + """ + + _prefix = "offload_" + + def initialize(self, pc): + super().initialize(pc) + + with PETSc.Log.Event("Event: initialize offload"): + A, P = pc.getOperators() + + # Convert matrix to ajicusparse + mat_type = PETSc.Options().getString(self._prefix + "mat_type", "cusparse") + with PETSc.Log.Event("Event: matrix offload"): + P_cu = P.convert(mat_type='aijcusparse') # todo + + # Transfer nullspace + P_cu.setNullSpace(P.getNullSpace()) + P_cu.setTransposeNullSpace(P.getTransposeNullSpace()) + P_cu.setNearNullSpace(P.getNearNullSpace()) + + # Update preconditioner with GPU matrix + self.pc.setOperators(A, P_cu) + + def form(self, pc, test, trial): + _, P = pc.getOperators() + if P.getType() == "python": + context = P.getPythonContext() + return (context.a, context.row_bcs) + else: + context = dmhooks.get_appctx(pc.getDM()) + return (context.Jp or context.J, context._problem.bcs) + + # Convert vectors to CUDA, solve and get solution on CPU back + def apply(self, pc, x, y): + with PETSc.Log.Event("Event: apply offload"): # + dm = pc.getDM() + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): + with PETSc.Log.Event("Event: vectors offload"): + y_cu = PETSc.Vec() # begin + y_cu.createCUDAWithArrays(y) + x_cu = PETSc.Vec() + # Passing a vec into another vec doesnt work because original is locked + x_cu.createCUDAWithArrays(x.array_r) + with PETSc.Log.Event("Event: solve"): + self.pc.apply(x_cu, y_cu) + # Calling data to synchronize vector + tmp = y_cu.array_r # noqa: F841 + with PETSc.Log.Event("Event: vectors copy back"): + y.copy(y_cu) # + + def applyTranspose(self, pc, X, Y): + raise NotImplementedError + + def view(self, pc, viewer=None): + super().view(pc, viewer) + print("viewing PC") + if hasattr(self, "pc"): + viewer.printfASCII("PC to solve on GPU\n") + self.pc.view(viewer) diff --git a/mfe.py b/mfe.py new file mode 100644 index 0000000000..2fac75246e --- /dev/null +++ b/mfe.py @@ -0,0 +1,17 @@ +from firedrake import * + + +# Setting up mesh parameters +nx, ny = 20, 20 +mesh = RectangleMesh(nx, ny, 1.0, 1.0) + + +# Setting up function space +degree = 4 +V = FunctionSpace(mesh, "CG", degree) + +# Using vertex only mesh +source_locations = [(0.5, 0.5)] +source_mesh = VertexOnlyMesh(mesh, source_locations) + +print("END", flush=True) diff --git a/tests/firedrake/conftest.py b/tests/firedrake/conftest.py index 10059578c5..ab6252642a 100644 --- a/tests/firedrake/conftest.py +++ b/tests/firedrake/conftest.py @@ -7,7 +7,7 @@ os.environ["FIREDRAKE_DISABLE_OPTIONS_LEFT"] = "1" import pytest -from firedrake.petsc import PETSc, get_external_packages +from firedrake.petsc import PETSc, get_external_packages, get_petsc_variables def _skip_test_dependency(dependency): @@ -145,11 +145,29 @@ def pytest_configure(config): "markers", "skipnetgen: mark as skipped if netgen and ngsPETSc is not installed" ) + config.addinivalue_line( + "markers", + "skipcuda: mark as skipped if CUDA is not available" + ) + config.addinivalue_line( + "markers", + "skipgpu: mark as skipped if a GPU enabled PETSC was installed" + ) def pytest_collection_modifyitems(session, config, items): from firedrake.utils import complex_mode, SLATE_SUPPORTS_COMPLEX + try: + get_petsc_variables()["CUDA_VERSION"] + # They look like the same thing (but opposite) for now, but they are not. + # This will skip some nongpurelated tests (hypre) if a gpu-aware petsc was installed. + cuda_unavailable = False + gpu_based_petsc = True + except: + cuda_unavailable = True + gpu_based_petsc = False + for item in items: if complex_mode: if item.get_closest_marker("skipcomplex") is not None: @@ -160,6 +178,14 @@ def pytest_collection_modifyitems(session, config, items): if item.get_closest_marker("skipreal") is not None: item.add_marker(pytest.mark.skip(reason="Test makes no sense unless in complex mode")) + if cuda_unavailable: + if item.get_closest_marker("skipcuda") is not None: + item.add_marker(pytest.mark.skip(reason="CUDA not available")) + + if gpu_based_petsc: + if item.get_closest_marker("skipgpu") is not None: + item.add_marker(pytest.mark.skip(reason="Test skipped on gpu-based install")) + for dep, marker, reason in dependency_skip_markers_and_reasons: if _skip_test_dependency(dep) and item.get_closest_marker(marker) is not None: item.add_marker(pytest.mark.skip(reason)) diff --git a/tests/firedrake/cuda/test_poisson_offloading_pc.py b/tests/firedrake/cuda/test_poisson_offloading_pc.py new file mode 100644 index 0000000000..3ad749e686 --- /dev/null +++ b/tests/firedrake/cuda/test_poisson_offloading_pc.py @@ -0,0 +1,64 @@ +from firedrake import * +import numpy as np +import pytest + +# TODO: add marker for cuda pytests and something to check if cuda memory was really used +@pytest.mark.skipcuda +@pytest.mark.parametrize("ksp_type, pc_type", [("cg", "sor"), ("cg", "gamg"), ("preonly", "lu")]) +def test_poisson_on_cuda(ksp_type, pc_type): + + # Different tests for poisoon: cg and pctype sor, --ksp_type=cg --pc_type=gamg + print(f"Using ksp_type = {ksp_type}, and pc_type = {pc_type}.", flush=True) + + nested_parameters = { + "pc_type": "ksp", + "ksp": { + "ksp_type": ksp_type, + "ksp_view": None, + "ksp_rtol": "1e-10", + "ksp_monitor": None, + "pc_type": pc_type, + } + } + parameters = { + "ksp_type": "preonly", + "pc_type": "python", + "pc_python_type": "firedrake.OffloadPC", + "offload": nested_parameters, + } + + mesh = UnitSquareMesh(10, 10) + V = FunctionSpace(mesh, "CG", 1) + u = TrialFunction(V) + v = TestFunction(V) + + f = Function(V) + x, y = SpatialCoordinate(mesh) + f.interpolate(2*pi**2*sin(pi*x)*sin(pi*y)) + + # Equations + L = inner(grad(u), grad(v)) * dx + R = inner(v, f) * dx + + # Dirichlet boundary on all sides to 0 + bcs = DirichletBC(V, 0, "on_boundary") + + # Exact solution + sol = Function(V) + sol.interpolate(sin(pi*x)*sin(pi*y)) + + # Solution function + u_f = Function(V) + + problem = LinearVariationalProblem(L, R, u_f, bcs=bcs) + solver = LinearVariationalSolver(problem, solver_parameters=parameters) + solver.solve() + npsol = sol.dat.data[:] + npu_f = u_f.dat.data[:] + error = errornorm(problem.u, sol) + print(f"Error norm = {error}", flush=True) + assert error < 1.2e-2 + + +if __name__ == "__main__": + test_poisson_on_cuda() diff --git a/tests/firedrake/cuda/test_wave_equation.py b/tests/firedrake/cuda/test_wave_equation.py new file mode 100644 index 0000000000..d24c2741bf --- /dev/null +++ b/tests/firedrake/cuda/test_wave_equation.py @@ -0,0 +1,125 @@ +from firedrake import * +import os +import numpy as np +import finat +from firedrake.__future__ import interpolate +import pytest + + +@pytest.mark.skipcuda +def test_kmv_wave_propagation_cuda(): + nested_parameters = { + "ksp_type": "preonly", + "pc_type": "jacobi" + } + parameters = { + "ksp_type": "preonly", + "pc_type": "python", + "pc_python_type": "firedrake.OffloadPC", + "offload": nested_parameters, + } + + + # Choosing degree + degree = 4 + + # Setting up time variables + dt = 0.001 # time step in seconds + final_time = 0.5 # final time in seconds + total_steps = int(final_time / dt) + 1 + + # Setting up mesh parameters + nx, ny = 10, 10 + mesh = RectangleMesh(nx, ny, 1.0, 1.0) + + # Acquisition geometry + frequency_peak = 5.0 # The dominant frequency of the Ricker wavelet in Hz. + offset = 0.2 + source_locations = [(0.5, 0.5)] + receiver_locations = [(0.5, 0.5 + offset)] + + # Setting up function space + V = FunctionSpace(mesh, "KMV", degree) + + # Velocity model + c = Constant(1.5) + + # Ricker wavelet definition + def ricker_wavelet(t, freq, amp=1.0, delay=0.2, delay_type="time"): + if delay_type == "multiples_of_minimun": + time_delay = delay * np.sqrt(6.0) / (np.pi * freq) + elif delay_type == "time": + time_delay = delay + t = t - time_delay + # t = t - delay / freq + tt = (np.pi * freq * t) ** 2 + return amp * (1.0 - (2.0) * tt) * np.exp((-1.0) * tt) + + # Using vertex only mesh + source_mesh = VertexOnlyMesh(mesh, source_locations) + V_s = FunctionSpace(source_mesh, "DG", 0) + d_s = Function(V_s) + d_s.interpolate(1.0) + source_cofunction = assemble(d_s * TestFunction(V_s) * dx) + q_s = Cofunction(V.dual()).interpolate(source_cofunction) + receiver_mesh = VertexOnlyMesh(mesh, receiver_locations) + V_r = FunctionSpace(receiver_mesh, "DG", 0) + f = Cofunction(V.dual()) + + true_data_receivers = [] + + # Setting up forward problem + u = TrialFunction(V) + v = TestFunction(V) + u_np1 = Function(V) # timestep n+1 + u_n = Function(V) # timestep n + u_nm1 = Function(V) # timestep n-1 + # Quadrature rule for lumped mass matrix. + quad_rule = finat.quadrature.make_quadrature(V.finat_element.cell, V.ufl_element().degree(), "KMV") + m = (1 / (c * c)) + time_term = m * ((u - 2.0 * u_n + u_nm1) / Constant(dt**2)) * v * dx(scheme=quad_rule) + nf = (1 / c) * ((u_n - u_nm1) / dt) * v * ds + a = dot(grad(u_n), grad(v)) * dx(scheme=quad_rule) + F = time_term + a + nf + lin_var = LinearVariationalProblem(lhs(F), rhs(F) + f, u_np1) + # Since the linear system matrix is diagonal, the solver parameters are set to construct a solver, + # which applies a single step of Jacobi preconditioning. + + solver = LinearVariationalSolver(lin_var,solver_parameters=parameters) + + interpolate_receivers = interpolate(u_np1, V_r) + + # Looping in time + for step in range(total_steps): + if step % 100 == 0: + print(f"For time = {step*dt}s") + f.assign(ricker_wavelet(step * dt, frequency_peak) * q_s) + solver.solve() + u_nm1.assign(u_n) + u_n.assign(u_np1) + rec_out = assemble(interpolate_receivers) + true_data_receivers.append(rec_out.dat.data[:]) + + rec_matrix = np.matrix(true_data_receivers) + + # Hard coded values from an analytical solution + # Hard coded values from an analytical solution + min_value = -0.05708 + max_value = 0.09467 + min_location = 0.2701 + max_location = 0.3528 + + correct_min_loc = np.isclose(min_location, np.argmin(rec_matrix)*dt, rtol=1e-2) + correct_min = np.isclose(min_value, np.min(rec_matrix), rtol=1e-2) + correct_max = np.isclose(max_value, np.max(rec_matrix), rtol=1e-2) + correct_max_loc = np.isclose(max_location, np.argmax(rec_matrix)*dt, rtol=1e-2) + + print(f"Correct minimum and its location: {correct_min} and {correct_min_loc}.") + print(f"Correct maximum and its location: {correct_max} and {correct_max_loc}.") + + print("END", flush=True) + assert all([correct_min_loc, correct_min, correct_max, correct_max_loc]) + + +if __name__ == "__main__": + test_kmv_wave_propagation_cuda() diff --git a/tests/firedrake/demos/test_demos_run.py b/tests/firedrake/demos/test_demos_run.py index fe5665a0fe..8fc09fcd5f 100644 --- a/tests/firedrake/demos/test_demos_run.py +++ b/tests/firedrake/demos/test_demos_run.py @@ -117,6 +117,7 @@ def _exec_file(py_file): @pytest.mark.skipcomplex +@pytest.mark.skipgpu @pytest.mark.parametrize("demo", SERIAL_DEMOS, ids=["/".join(d.loc) for d in SERIAL_DEMOS]) def test_serial_demo(demo, env, monkeypatch, tmpdir, skip_dependency): _maybe_skip_demo(demo, skip_dependency) @@ -130,6 +131,7 @@ def test_serial_demo(demo, env, monkeypatch, tmpdir, skip_dependency): @pytest.mark.parallel(2) @pytest.mark.skipcomplex +@pytest.mark.skipgpu @pytest.mark.parametrize("demo", PARALLEL_DEMOS, ids=["/".join(d.loc) for d in PARALLEL_DEMOS]) def test_parallel_demo(demo, env, monkeypatch, tmpdir, skip_dependency): _maybe_skip_demo(demo, skip_dependency) diff --git a/tests/firedrake/multigrid/test_nested_split.py b/tests/firedrake/multigrid/test_nested_split.py index 77083dc447..6b65780a20 100644 --- a/tests/firedrake/multigrid/test_nested_split.py +++ b/tests/firedrake/multigrid/test_nested_split.py @@ -56,6 +56,7 @@ "fieldsplit_1_sub_pc_type": "ilu"}]) @pytest.mark.skipcomplex @pytest.mark.skipcomplexnoslate +@pytest.mark.skipgpu def test_nested_split_multigrid(parameters): mesh = UnitSquareMesh(10, 10) diff --git a/tests/firedrake/regression/test_hypre_ads.py b/tests/firedrake/regression/test_hypre_ads.py index 7468eed722..74ecbcd8fb 100644 --- a/tests/firedrake/regression/test_hypre_ads.py +++ b/tests/firedrake/regression/test_hypre_ads.py @@ -4,6 +4,7 @@ @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_linear(): mesh = UnitCubeMesh(10, 10, 10) V = FunctionSpace(mesh, "RT", 1) @@ -32,6 +33,7 @@ def test_homogeneous_field_linear(): @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_matfree(): mesh = UnitCubeMesh(10, 10, 10) V = FunctionSpace(mesh, "RT", 1) @@ -63,6 +65,7 @@ def test_homogeneous_field_matfree(): @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_nonlinear(): mesh = UnitCubeMesh(10, 10, 10) V = FunctionSpace(mesh, "RT", 1) @@ -89,6 +92,7 @@ def test_homogeneous_field_nonlinear(): @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_linear_convergence(): mesh = UnitCubeMesh(10, 10, 10) V = FunctionSpace(mesh, "RT", 1) diff --git a/tests/firedrake/regression/test_hypre_ams.py b/tests/firedrake/regression/test_hypre_ams.py index 549a1d7e8b..03d97c8596 100644 --- a/tests/firedrake/regression/test_hypre_ams.py +++ b/tests/firedrake/regression/test_hypre_ams.py @@ -5,6 +5,7 @@ @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_linear(): mesh = UnitCubeMesh(5, 5, 5) V = FunctionSpace(mesh, "N1curl", 1) @@ -39,6 +40,7 @@ def test_homogeneous_field_linear(): @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_matfree(): mesh = UnitCubeMesh(5, 5, 5) V = FunctionSpace(mesh, "N1curl", 1) @@ -76,6 +78,7 @@ def test_homogeneous_field_matfree(): @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_nonlinear(): mesh = UnitCubeMesh(5, 5, 5) V = FunctionSpace(mesh, "N1curl", 1) @@ -109,6 +112,7 @@ def test_homogeneous_field_nonlinear(): @pytest.mark.skiphypre @pytest.mark.skipcomplex +@pytest.mark.skipgpu def test_homogeneous_field_linear_convergence(): N = 4 mesh = UnitCubeMesh(2**N, 2**N, 2**N)