@article{62064,
  abstract     = {{SYCL is an open standard for targeting heterogeneous hardware from C++. In this work, we evaluate a SYCL implementation for a discontinuous Galerkin discretization of the 2D shallow water equations targeting CPUs, GPUs, and also FPGAs. The discretization uses polynomial orders zero to two on unstructured triangular meshes. Separating memory accesses from the numerical code allow us to optimize data accesses for the target architecture. A performance analysis shows good portability across x86 and ARM CPUs, GPUs from different vendors, and even two variants of Intel Stratix 10 FPGAs. Measuring the energy to solution shows that GPUs yield an up to 10x higher energy efficiency in terms of degrees of freedom per joule compared to CPUs. With custom designed caches, FPGAs offer a meaningful complement to the other architectures with particularly good computational performance on smaller meshes. FPGAs with High Bandwidth Memory are less affected by bandwidth issues and have similar energy efficiency as latest generation CPUs.}},
  author       = {{Büttner, Markus and Alt, Christoph and Kenter, Tobias and Köstler, Harald and Plessl, Christian and Aizinger, Vadym}},
  issn         = {{1573-0484}},
  journal      = {{The Journal of Supercomputing}},
  number       = {{6}},
  publisher    = {{Springer Science and Business Media LLC}},
  title        = {{{Analyzing performance portability for a SYCL implementation of the 2D shallow water equations}}},
  doi          = {{10.1007/s11227-025-07063-7}},
  volume       = {{81}},
  year         = {{2025}},
}

@inproceedings{62066,
  abstract     = {{In the context of high-performance computing (HPC) for distributed workloads, individual field-programmable gate arrays (FPGAs) need efficient ways to exchange data, which requires network infrastructure and software abstractions. Dedicated multi-FPGA clusters provide inter-FPGA networks for direct device to device communication. The oneAPI high-level synthesis toolchain offers I/O pipes to allow user kernels to interact with the networking ports of the FPGA board. In this work, we evaluate using oneAPI I/O pipes for direct FPGA-to-FPGA communication by scaling a SYCL implementation of a Jacobi solver on up to 25 FPGAs in the Noctua 2 cluster. We see good results in weak and strong scaling experiments.}},
  author       = {{Alt, Christoph and Plessl, Christian and Kenter, Tobias}},
  booktitle    = {{Proceedings of the 13th International Workshop on OpenCL and SYCL}},
  isbn         = {{9798400713606}},
  keywords     = {{Multi-FPGA, High-level Synthesis, oneAPI, FPGA}},
  publisher    = {{Association for Computing Machinery}},
  title        = {{{Evaluating oneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to multiple FPGAs}}},
  doi          = {{10.1145/3731125.3731131}},
  year         = {{2025}},
}

@inproceedings{62065,
  author       = {{Sundriyal, Shivam and Büttner, Markus and Alt, Christoph and Kenter, Tobias and Aizinger, Vadym}},
  booktitle    = {{2025 IEEE High Performance Extreme Computing Conference (HPEC)}},
  publisher    = {{IEEE}},
  title        = {{{Adaptive Spectral Block Floating Point for Discontinuous Galerkin Methods}}},
  doi          = {{10.1109/hpec67600.2025.11196195}},
  year         = {{2025}},
}

@inproceedings{56605,
  author       = {{Opdenhövel, Jan-Oliver and Alt, Christoph and Plessl, Christian and Kenter, Tobias}},
  booktitle    = {{2024 34th International Conference on Field-Programmable Logic and Applications (FPL)}},
  publisher    = {{IEEE}},
  title        = {{{StencilStream: A SYCL-based Stencil Simulation Framework Targeting FPGAs}}},
  doi          = {{10.1109/fpl64840.2024.00023}},
  year         = {{2024}},
}

@inproceedings{54312,
  author       = {{Büttner, Markus and Alt, Christoph and Kenter, Tobias and Köstler, Harald and Plessl, Christian and Aizinger, Vadym}},
  booktitle    = {{Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)}},
  publisher    = {{ACM}},
  title        = {{{Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL}}},
  doi          = {{10.1145/3659914.3659925}},
  year         = {{2024}},
}

@inproceedings{60359,
  abstract     = {{<jats:title>Abstract</jats:title><jats:p>The free‐surface lattice Boltzmann method uses a volume of fluid approach to simulate immiscible two‐fluid flow problems. It divides the simulation domain into three distinct phases—gas, fluid, and interface—where computation within the gas phase is disregarded. The interface delineates a one‐cell‐thick layer between the first two phases, validated physically for implementation in the HPC C++ multiphysics framework <jats:sc>waLBerla</jats:sc> but lacking an exhaustive performance analysis. This paper aims to shed light on node‐level performance on different architectures, employing continuous benchmarking, showing and analyzing weak scaling results on a modern HPC cluster, the Fritz supercomputer, and reporting energy consumption for the current implementation.</jats:p>}},
  author       = {{Plewinski, Jonas and Alt, Christoph and Köstler, Harald and Rüde, Ulrich}},
  booktitle    = {{PAMM}},
  issn         = {{1617-7061}},
  number       = {{3}},
  publisher    = {{Wiley}},
  title        = {{{Performance analysis of the free surface lattice Boltzmann implementation in waLBerla}}},
  doi          = {{10.1002/pamm.202400196}},
  volume       = {{24}},
  year         = {{2024}},
}

@article{60358,
  author       = {{Alt, Christoph and Lanser, Martin and Plewinski, Jonas and Janki, Atin and Klawonn, Axel and Köstler, Harald and Selzer, Michael and Rüde, Ulrich}},
  issn         = {{1744-5760}},
  journal      = {{International Journal of Parallel, Emergent and Distributed Systems}},
  number       = {{4}},
  pages        = {{501--523}},
  publisher    = {{Informa UK Limited}},
  title        = {{{A continuous benchmarking infrastructure for high-performance computing applications}}},
  doi          = {{10.1080/17445760.2024.2360190}},
  volume       = {{39}},
  year         = {{2024}},
}

@inbook{46191,
  author       = {{Alt, Christoph and Kenter, Tobias and Faghih-Naini, Sara and Faj, Jennifer and Opdenhövel, Jan-Oliver and Plessl, Christian and Aizinger, Vadym and Hönig, Jan and Köstler, Harald}},
  booktitle    = {{Lecture Notes in Computer Science}},
  isbn         = {{9783031320408}},
  issn         = {{0302-9743}},
  publisher    = {{Springer Nature Switzerland}},
  title        = {{{Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline}}},
  doi          = {{10.1007/978-3-031-32041-5_5}},
  year         = {{2023}},
}

