@unpublished{43439, abstract = {{This preprint makes the claim of having computed the $9^{th}$ Dedekind Number. This was done by building an efficient FPGA Accelerator for the core operation of the process, and parallelizing it on the Noctua 2 Supercluster at Paderborn University. The resulting value is 286386577668298411128469151667598498812366. This value can be verified in two steps. We have made the data file containing the 490M results available, each of which can be verified separately on CPU, and the whole file sums to our proposed value.}}, author = {{Van Hirtum, Lennart and De Causmaecker, Patrick and Goemaere, Jens and Kenter, Tobias and Riebler, Heinrich and Lass, Michael and Plessl, Christian}}, booktitle = {{arXiv:2304.03039}}, title = {{{A computation of D(9) using FPGA Supercomputing}}}, year = {{2023}}, } @inproceedings{43228, abstract = {{The computation of electron repulsion integrals (ERIs) over Gaussian-type orbitals (GTOs) is a challenging problem in quantum-mechanics-based atomistic simulations. In practical simulations, several trillions of ERIs may have to be computed for every time step. In this work, we investigate FPGAs as accelerators for the ERI computation. We use template parameters, here within the Intel oneAPI tool flow, to create customized designs for 256 different ERI quartet classes, based on their orbitals. To maximize data reuse, all intermediates are buffered in FPGA on-chip memory with customized layout. The pre-calculation of intermediates also helps to overcome data dependencies caused by multi-dimensional recurrence relations. The involved loop structures are partially or even fully unrolled for high throughput of FPGA kernels. Furthermore, a lossy compression algorithm utilizing arbitrary bitwidth integers is integrated in the FPGA kernels. To our best knowledge, this is the first work on ERI computation on FPGAs that supports more than just the single most basic quartet class. Also, the integration of ERI computation and compression it a novelty that is not even covered by CPU or GPU libraries so far. Our evaluation shows that using 16-bit integer for the ERI compression, the fastest FPGA kernels exceed the performance of 10 GERIS ($10 \times 10^9$ ERIs per second) on one Intel Stratix 10 GX 2800 FPGA, with maximum absolute errors around $10^{-7}$ - $10^{-5}$ Hartree. The measured throughput can be accurately explained by a performance model. The FPGA kernels deployed on 2 FPGAs outperform similar computations using the widely used libint reference on a two-socket server with 40 Xeon Gold 6148 CPU cores of the same process technology by factors up to 6.0x and on a new two-socket server with 128 EPYC 7713 CPU cores by up to 1.9x.}}, author = {{Wu, Xin and Kenter, Tobias and Schade, Robert and Kühne, Thomas and Plessl, Christian}}, booktitle = {{2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)}}, pages = {{162--173}}, title = {{{Computing and Compressing Electron Repulsion Integrals on FPGAs}}}, doi = {{10.1109/FCCM57271.2023.00026}}, year = {{2023}}, } @inproceedings{46189, author = {{Prouveur, Charles and Haefele, Matthieu and Kenter, Tobias and Voss, Nils}}, booktitle = {{Proceedings of the Platform for Advanced Scientific Computing Conference}}, publisher = {{ACM}}, title = {{{FPGA Acceleration for HPC Supercapacitor Simulations}}}, doi = {{10.1145/3592979.3593419}}, year = {{2023}}, } @unpublished{50172, abstract = {{Viscous hydrodynamics serves as a successful mesoscopic description of the Quark-Gluon Plasma produced in relativistic heavy-ion collisions. In order to investigate, how such an effective description emerges from the underlying microscopic dynamics we calculate the hydrodynamic and non-hydrodynamic modes of linear response in the sound channel from a first-principle calculation in kinetic theory. We do this with a new approach wherein we discretize the collision kernel to directly calculate eigenvalues and eigenmodes of the evolution operator. This allows us to study the Green's functions at any point in the complex frequency space. Our study focuses on scalar theory with quartic interaction and we find that the analytic structure of Green's functions in the complex plane is far more complicated than just poles or cuts which is a first step towards an equivalent study in QCD kinetic theory.}}, author = {{Ochsenfeld, Stephan and Schlichting, Sören}}, booktitle = {{arXiv:2308.04491}}, title = {{{Hydrodynamic and Non-hydrodynamic Excitations in Kinetic Theory -- A Numerical Analysis in Scalar Field Theory}}}, year = {{2023}}, } @unpublished{50221, abstract = {{Memory Gym presents a suite of 2D partially observable environments, namely Mortar Mayhem, Mystery Path, and Searing Spotlights, designed to benchmark memory capabilities in decision-making agents. These environments, originally with finite tasks, are expanded into innovative, endless formats, mirroring the escalating challenges of cumulative memory games such as ``I packed my bag''. This progression in task design shifts the focus from merely assessing sample efficiency to also probing the levels of memory effectiveness in dynamic, prolonged scenarios. To address the gap in available memory-based Deep Reinforcement Learning baselines, we introduce an implementation that integrates Transformer-XL (TrXL) with Proximal Policy Optimization. This approach utilizes TrXL as a form of episodic memory, employing a sliding window technique. Our comparative study between the Gated Recurrent Unit (GRU) and TrXL reveals varied performances across different settings. TrXL, on the finite environments, demonstrates superior sample efficiency in Mystery Path and outperforms in Mortar Mayhem. However, GRU is more efficient on Searing Spotlights. Most notably, in all endless tasks, GRU makes a remarkable resurgence, consistently outperforming TrXL by significant margins. Website and Source Code: https://github.com/MarcoMeter/endless-memory-gym/}}, author = {{Pleines, Marco and Pallasch, Matthias and Zimmer, Frank and Preuss, Mike}}, booktitle = {{arXiv:2309.17207}}, title = {{{Memory Gym: Towards Endless Tasks to Benchmark Memory Capabilities of Agents}}}, year = {{2023}}, } @inproceedings{46190, author = {{Opdenhövel, Jan-Oliver and Plessl, Christian and Kenter, Tobias}}, booktitle = {{Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies}}, publisher = {{ACM}}, title = {{{Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation}}}, doi = {{10.1145/3597031.3597050}}, year = {{2023}}, } @inproceedings{46188, author = {{Faj, Jennifer and Kenter, Tobias and Faghih-Naini, Sara and Plessl, Christian and Aizinger, Vadym}}, booktitle = {{Proceedings of the Platform for Advanced Scientific Computing Conference}}, publisher = {{ACM}}, title = {{{Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes}}}, doi = {{10.1145/3592979.3593407}}, year = {{2023}}, } @inproceedings{46193, author = {{Karp, Martin and Podobas, Artur and Kenter, Tobias and Jansson, Niclas and Plessl, Christian and Schlatter, Philipp and Markidis, Stefano}}, booktitle = {{International Conference on High Performance Computing in Asia-Pacific Region}}, publisher = {{ACM}}, title = {{{A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges}}}, doi = {{10.1145/3492805.3492808}}, year = {{2022}}, } @unpublished{36879, abstract = {{The Julia programming language has evolved into a modern alternative to fill existing gaps in scientific computing and data science applications. Julia leverages a unified and coordinated single-language and ecosystem paradigm and has a proven track record of achieving high performance without sacrificing user productivity. These aspects make Julia a viable alternative to high-performance computing's (HPC's) existing and increasingly costly many-body workflow composition strategy in which traditional HPC languages (e.g., Fortran, C, C++) are used for simulations, and higher-level languages (e.g., Python, R, MATLAB) are used for data analysis and interactive computing. Julia's rapid growth in language capabilities, package ecosystem, and community make it a promising universal language for HPC. This paper presents the views of a multidisciplinary group of researchers from academia, government, and industry that advocate for an HPC software development paradigm that emphasizes developer productivity, workflow portability, and low barriers for entry. We believe that the Julia programming language, its ecosystem, and its community provide modern and powerful capabilities that enable this group's objectives. Crucially, we believe that Julia can provide a feasible and less costly approach to programming scientific applications and workflows that target HPC facilities. In this work, we examine the current practice and role of Julia as a common, end-to-end programming model to address major challenges in scientific reproducibility, data-driven AI/machine learning, co-design and workflows, scalability and performance portability in heterogeneous computing, network communication, data management, and community education. As a result, the diversification of current investments to fulfill the needs of the upcoming decade is crucial as more supercomputing centers prepare for the exascale era.}}, author = {{Churavy, Valentin and Godoy, William F and Bauer, Carsten and Ranocha, Hendrik and Schlottke-Lakemper, Michael and Räss, Ludovic and Blaschke, Johannes and Giordano, Mosè and Schnetter, Erik and Omlin, Samuel and Vetter, Jeffrey S and Edelman, Alan}}, title = {{{Bridging HPC Communities through the Julia Programming Language}}}, year = {{2022}}, } @unpublished{32404, abstract = {{The CP2K program package, which can be considered as the swiss army knife of atomistic simulations, is presented with a special emphasis on ab-initio molecular dynamics using the second-generation Car-Parrinello method. After outlining current and near-term development efforts with regards to massively parallel low-scaling post-Hartree-Fock and eigenvalue solvers, novel approaches on how we plan to take full advantage of future low-precision hardware architectures are introduced. Our focus here is on combining our submatrix method with the approximate computing paradigm to address the immanent exascale era.}}, author = {{Kühne, Thomas and Plessl, Christian and Schade, Robert and Schütt, Ole}}, booktitle = {{arXiv:2205.14741}}, title = {{{CP2K on the road to exascale}}}, year = {{2022}}, } @unpublished{32177, abstract = {{We investigate the early time development of the anisotropic transverse flow and spatial eccentricities of a fireball with various particle-based transport approaches using a fixed initial condition. In numerical simulations ranging from the quasi-collisionless case to the hydrodynamic regime, we find that the onset of $v_n$ and of related measures of anisotropic flow can be described with a simple power-law ansatz, with an exponent that depends on the amount of rescatterings in the system. In the few-rescatterings regime we perform semi-analytical calculations, based on a systematic expansion in powers of time and the cross section, which can reproduce the numerical findings.}}, author = {{Borghini, Nicolas and Borrell, Marc and Roch, Hendrik}}, booktitle = {{arXiv:2201.13294}}, title = {{{Early time behavior of spatial and momentum anisotropies in kinetic theory across different Knudsen numbers}}}, year = {{2022}}, } @unpublished{32178, abstract = {{We test the ability of the "escape mechanism" to create the anisotropic flow observed in high-energy nuclear collisions. We compare the flow harmonics $v_n$ in the few-rescatterings regime from two types of transport simulations, with $2\to 2$ and $2\to 0$ collision kernels respectively, and from analytical calculations neglecting the gain term of the Boltzmann equation. We find that the even flow harmonics are similar in the three approaches, while the odd harmonics differ significantly.}}, author = {{Bachmann, Benedikt and Borghini, Nicolas and Feld, Nina and Roch, Hendrik}}, booktitle = {{arXiv:2203.13306}}, title = {{{Even anisotropic-flow harmonics are from Venus, odd ones are from Mars}}}, year = {{2022}}, } @unpublished{33493, abstract = {{Electronic structure calculations have been instrumental in providing many important insights into a range of physical and chemical properties of various molecular and solid-state systems. Their importance to various fields, including materials science, chemical sciences, computational chemistry and device physics, is underscored by the large fraction of available public supercomputing resources devoted to these calculations. As we enter the exascale era, exciting new opportunities to increase simulation numbers, sizes, and accuracies present themselves. In order to realize these promises, the community of electronic structure software developers will however first have to tackle a number of challenges pertaining to the efficient use of new architectures that will rely heavily on massive parallelism and hardware accelerators. This roadmap provides a broad overview of the state-of-the-art in electronic structure calculations and of the various new directions being pursued by the community. It covers 14 electronic structure codes, presenting their current status, their development priorities over the next five years, and their plans towards tackling the challenges and leveraging the opportunities presented by the advent of exascale computing.}}, author = {{Gavini, Vikram and Baroni, Stefano and Blum, Volker and Bowler, David R. and Buccheri, Alexander and Chelikowsky, James R. and Das, Sambit and Dawson, William and Delugas, Pietro and Dogan, Mehmet and Draxl, Claudia and Galli, Giulia and Genovese, Luigi and Giannozzi, Paolo and Giantomassi, Matteo and Gonze, Xavier and Govoni, Marco and Gulans, Andris and Gygi, François and Herbert, John M. and Kokott, Sebastian and Kühne, Thomas and Liou, Kai-Hsin and Miyazaki, Tsuyoshi and Motamarri, Phani and Nakata, Ayako and Pask, John E. and Plessl, Christian and Ratcliff, Laura E. and Richard, Ryan M. and Rossi, Mariana and Schade, Robert and Scheffler, Matthias and Schütt, Ole and Suryanarayana, Phanish and Torrent, Marc and Truflandier, Lionel and Windus, Theresa L. and Xu, Qimen and Yu, Victor W. -Z. and Perez, Danny}}, booktitle = {{arXiv:2209.12747}}, title = {{{Roadmap on Electronic Structure Codes in the Exascale Era}}}, year = {{2022}}, } @unpublished{46275, abstract = {{Electronic structure calculations have been instrumental in providing many important insights into a range of physical and chemical properties of various molecular and solid-state systems. Their importance to various fields, including materials science, chemical sciences, computational chemistry and device physics, is underscored by the large fraction of available public supercomputing resources devoted to these calculations. As we enter the exascale era, exciting new opportunities to increase simulation numbers, sizes, and accuracies present themselves. In order to realize these promises, the community of electronic structure software developers will however first have to tackle a number of challenges pertaining to the efficient use of new architectures that will rely heavily on massive parallelism and hardware accelerators. This roadmap provides a broad overview of the state-of-the-art in electronic structure calculations and of the various new directions being pursued by the community. It covers 14 electronic structure codes, presenting their current status, their development priorities over the next five years, and their plans towards tackling the challenges and leveraging the opportunities presented by the advent of exascale computing.}}, author = {{Gavini, Vikram and Baroni, Stefano and Blum, Volker and Bowler, David R. and Buccheri, Alexander and Chelikowsky, James R. and Das, Sambit and Dawson, William and Delugas, Pietro and Dogan, Mehmet and Draxl, Claudia and Galli, Giulia and Genovese, Luigi and Giannozzi, Paolo and Giantomassi, Matteo and Gonze, Xavier and Govoni, Marco and Gulans, Andris and Gygi, François and Herbert, John M. and Kokott, Sebastian and Kühne, Thomas and Liou, Kai-Hsin and Miyazaki, Tsuyoshi and Motamarri, Phani and Nakata, Ayako and Pask, John E. and Plessl, Christian and Ratcliff, Laura E. and Richard, Ryan M. and Rossi, Mariana and Schade, Robert and Scheffler, Matthias and Schütt, Ole and Suryanarayana, Phanish and Torrent, Marc and Truflandier, Lionel and Windus, Theresa L. and Xu, Qimen and Yu, Victor W. -Z. and Perez, Danny}}, booktitle = {{arXiv:2209.12747}}, title = {{{Roadmap on Electronic Structure Codes in the Exascale Era}}}, year = {{2022}}, } @inproceedings{46194, author = {{Kenter, Tobias and Shambhu, Adesh and Faghih-Naini, Sara and Aizinger, Vadym}}, booktitle = {{Proceedings of the Platform for Advanced Scientific Computing Conference}}, publisher = {{ACM}}, title = {{{Algorithm-hardware co-design of a discontinuous Galerkin shallow-water model for a dataflow architecture on FPGA}}}, doi = {{10.1145/3468267.3470617}}, year = {{2021}}, } @inproceedings{20886, author = {{Nickchen, Tobias and Heindorf, Stefan and Engels, Gregor}}, booktitle = {{Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision}}, location = {{Hawaii}}, pages = {{1994--2002}}, title = {{{Generating Physically Sound Training Data for Image Recognition of Additively Manufactured Parts}}}, year = {{2021}}, } @inproceedings{46195, author = {{Karp, Martin and Podobas, Artur and Jansson, Niclas and Kenter, Tobias and Plessl, Christian and Schlatter, Philipp and Markidis, Stefano}}, booktitle = {{2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS)}}, publisher = {{IEEE}}, title = {{{High-Performance Spectral Element Methods on Field-Programmable Gate Arrays : Implementation, Evaluation, and Future Projection}}}, doi = {{10.1109/ipdps49936.2021.00116}}, year = {{2021}}, } @inproceedings{29937, author = {{Karp, Martin and Podobas, Artur and Jansson, Niclas and Kenter, Tobias and Plessl, Christian and Schlatter, Philipp and Markidis, Stefano}}, booktitle = {{2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS)}}, publisher = {{IEEE}}, title = {{{High-Performance Spectral Element Methods on Field-Programmable Gate Arrays : Implementation, Evaluation, and Future Projection}}}, doi = {{10.1109/ipdps49936.2021.00116}}, year = {{2021}}, } @unpublished{32245, abstract = {{Optical travelling wave antennas offer unique opportunities to control and selectively guide light into a specific direction which renders them as excellent candidates for optical communication and sensing. These applications require state of the art engineering to reach optimized functionalities such as high directivity and radiation efficiency, low side lobe level, broadband and tunable capabilities, and compact design. In this work we report on the numerical optimization of the directivity of optical travelling wave antennas made from low-loss dielectric materials using full-wave numerical simulations in conjunction with a particle swarm optimization algorithm. The antennas are composed of a reflector and a director deposited on a glass substrate and an emitter placed in the feed gap between them serves as an internal source of excitation. In particular, we analysed antennas with rectangular- and horn-shaped directors made of either Hafnium dioxide or Silicon. The optimized antennas produce highly directional emission due to the presence of two dominant guided TE modes in the director in addition to leaky modes. These guided modes dominate the far-field emission pattern and govern the direction of the main lobe emission which predominately originates from the end facet of the director. Our work also provides a comprehensive analysis of the modes, radiation patterns, parametric influences, and bandwidths of the antennas that highlights their robust nature.}}, author = {{Farheen, Henna and Leuteritz, Till and Linden, Stefan and Myroshnychenko, Viktor and Förstner, Jens}}, booktitle = {{arXiv:2106.02468}}, title = {{{Optimization of optical waveguide antennas for directive emission of light}}}, year = {{2021}}, } @unpublished{32236, abstract = {{The interaction between quantum light and matter is being intensively studied for systems that are enclosed in high-$Q$ cavities which strongly enhance the light-matter coupling. However, for many applications, cavities with lower $Q$-factors are preferred due to the increased spectral width of the cavity mode. Here, we investigate the interaction between quantum light and matter represented by a $\Lambda$-type three-level system in lossy cavities, assuming that cavity losses are the dominant loss mechanism. We demonstrate that cavity losses lead to non-trivial steady states of the electronic occupations that can be controlled by the loss rate and the initial statistics of the quantum fields. The mechanism of formation of such steady states can be understood on the basis of the equations of motion. Analytical expressions for steady states and their numerical simulations are presented and discussed.}}, author = {{Rose, H. and Tikhonova, O. V. and Meier, T. and Sharapova, P. }}, booktitle = {{arXiv:2109.00842}}, title = {{{Steady states of $Λ$-type three-level systems excited by quantum light in lossy cavities}}}, year = {{2021}}, }