[{"publication_status":"published","year":"2024","citation":{"chicago":"Heuchler, Sebastian, and Christian Plessl. “Reproduction and Extension of Playing Strength Models in Computer Go.” In <i>2024 IEEE Conference on Games (CoG)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">https://doi.org/10.1109/cog60054.2024.10645535</a>.","ieee":"S. Heuchler and C. Plessl, “Reproduction and Extension of Playing Strength Models in Computer Go,” 2024, doi: <a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>.","ama":"Heuchler S, Plessl C. Reproduction and Extension of Playing Strength Models in Computer Go. In: <i>2024 IEEE Conference on Games (CoG)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>","apa":"Heuchler, S., &#38; Plessl, C. (2024). Reproduction and Extension of Playing Strength Models in Computer Go. <i>2024 IEEE Conference on Games (CoG)</i>. <a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">https://doi.org/10.1109/cog60054.2024.10645535</a>","short":"S. Heuchler, C. Plessl, in: 2024 IEEE Conference on Games (CoG), IEEE, 2024.","mla":"Heuchler, Sebastian, and Christian Plessl. “Reproduction and Extension of Playing Strength Models in Computer Go.” <i>2024 IEEE Conference on Games (CoG)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>.","bibtex":"@inproceedings{Heuchler_Plessl_2024, title={Reproduction and Extension of Playing Strength Models in Computer Go}, DOI={<a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>}, booktitle={2024 IEEE Conference on Games (CoG)}, publisher={IEEE}, author={Heuchler, Sebastian and Plessl, Christian}, year={2024} }"},"publisher":"IEEE","date_updated":"2025-06-23T12:35:55Z","author":[{"last_name":"Heuchler","id":"47054","full_name":"Heuchler, Sebastian","first_name":"Sebastian"},{"full_name":"Plessl, Christian","id":"16153","last_name":"Plessl","orcid":"0000-0001-5728-9982","first_name":"Christian"}],"date_created":"2025-06-23T12:29:07Z","title":"Reproduction and Extension of Playing Strength Models in Computer Go","doi":"10.1109/cog60054.2024.10645535","publication":"2024 IEEE Conference on Games (CoG)","type":"conference","status":"public","_id":"60338","user_id":"47054","language":[{"iso":"eng"}]},{"year":"2024","quality_controlled":"1","title":"Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL","publisher":"Springer Nature Switzerland","date_created":"2025-11-04T09:50:24Z","abstract":[{"lang":"eng","text":"Most FPGA boards in the HPC domain are well-suited for parallel scaling because of the direct integration of versatile and high-throughput network ports. However, the utilization of their network capabilities is often challenging and error-prone because the whole network stack and communication patterns have to be implemented and managed on the FPGAs. Also, this approach conceptually involves a trade-off between the performance potential of improved communication and the impact of resource consumption for communication infrastructure, since the utilized resources on the FPGAs could otherwise be used for computations. In this work, we investigate this trade-off, firstly, by using synthetic benchmarks to evaluate the different configuration options of the communication framework ACCL and their impact on communication latency and throughput. Finally, we use our findings to implement a shallow water simulation whose scalability heavily depends on low-latency communication. With a suitable configuration of ACCL, good scaling behavior can be shown to all 48 FPGAs installed in the system. Overall, the results show that the availability of inter-FPGA communication frameworks as well as the configurability of framework and network stack are crucial to achieve the best application performance with low latency communication."}],"publication":"Lecture Notes in Computer Science","language":[{"iso":"eng"}],"place":"Cham","citation":{"apa":"Meyer, M., Kenter, T., Petrica, L., O’Brien, K., Blott, M., &#38; Plessl, C. (2024). Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL. In <i>Lecture Notes in Computer Science</i>. Springer Nature Switzerland. <a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">https://doi.org/10.1007/978-3-031-69766-1_9</a>","bibtex":"@inbook{Meyer_Kenter_Petrica_O’Brien_Blott_Plessl_2024, place={Cham}, title={Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL}, DOI={<a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">10.1007/978-3-031-69766-1_9</a>}, booktitle={Lecture Notes in Computer Science}, publisher={Springer Nature Switzerland}, author={Meyer, Marius and Kenter, Tobias and Petrica, Lucian and O’Brien, Kenneth and Blott, Michaela and Plessl, Christian}, year={2024} }","short":"M. Meyer, T. Kenter, L. Petrica, K. O’Brien, M. Blott, C. Plessl, in: Lecture Notes in Computer Science, Springer Nature Switzerland, Cham, 2024.","mla":"Meyer, Marius, et al. “Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL.” <i>Lecture Notes in Computer Science</i>, Springer Nature Switzerland, 2024, doi:<a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">10.1007/978-3-031-69766-1_9</a>.","ieee":"M. Meyer, T. Kenter, L. Petrica, K. O’Brien, M. Blott, and C. Plessl, “Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL,” in <i>Lecture Notes in Computer Science</i>, Cham: Springer Nature Switzerland, 2024.","chicago":"Meyer, Marius, Tobias Kenter, Lucian Petrica, Kenneth O’Brien, Michaela Blott, and Christian Plessl. “Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL.” In <i>Lecture Notes in Computer Science</i>. Cham: Springer Nature Switzerland, 2024. <a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">https://doi.org/10.1007/978-3-031-69766-1_9</a>.","ama":"Meyer M, Kenter T, Petrica L, O’Brien K, Blott M, Plessl C. Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL. In: <i>Lecture Notes in Computer Science</i>. Springer Nature Switzerland; 2024. doi:<a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">10.1007/978-3-031-69766-1_9</a>"},"publication_identifier":{"issn":["0302-9743","1611-3349"],"isbn":["9783031697654","9783031697661"]},"publication_status":"published","doi":"10.1007/978-3-031-69766-1_9","main_file_link":[{"open_access":"1"}],"date_updated":"2025-11-04T09:51:22Z","oa":"1","author":[{"last_name":"Meyer","full_name":"Meyer, Marius","id":"40778","first_name":"Marius"},{"full_name":"Kenter, Tobias","id":"3145","last_name":"Kenter","first_name":"Tobias"},{"first_name":"Lucian","full_name":"Petrica, Lucian","last_name":"Petrica"},{"last_name":"O’Brien","full_name":"O’Brien, Kenneth","first_name":"Kenneth"},{"first_name":"Michaela","last_name":"Blott","full_name":"Blott, Michaela"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","id":"16153","full_name":"Plessl, Christian","first_name":"Christian"}],"status":"public","type":"book_chapter","_id":"62067","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"3145"},{"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"56604","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"status":"public","type":"journal_article","main_file_link":[{"open_access":"1"}],"doi":"10.1145/3674147","oa":"1","date_updated":"2025-11-04T09:53:26Z","author":[{"first_name":"Lennart","last_name":"Van Hirtum","full_name":"Van Hirtum, Lennart","id":"100210"},{"first_name":"Patrick","last_name":"De Causmaecker","full_name":"De Causmaecker, Patrick"},{"first_name":"Jens","last_name":"Goemaere","full_name":"Goemaere, Jens"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"last_name":"Riebler","id":"8961","full_name":"Riebler, Heinrich","first_name":"Heinrich"},{"first_name":"Michael","last_name":"Lass","orcid":"0000-0002-5708-7632","full_name":"Lass, Michael","id":"24135"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153","first_name":"Christian"}],"volume":17,"citation":{"apa":"Van Hirtum, L., De Causmaecker, P., Goemaere, J., Kenter, T., Riebler, H., Lass, M., &#38; Plessl, C. (2024). A Computation of the Ninth Dedekind Number Using FPGA Supercomputing. <i>ACM Transactions on Reconfigurable Technology and Systems</i>, <i>17</i>(3), 1–28. <a href=\"https://doi.org/10.1145/3674147\">https://doi.org/10.1145/3674147</a>","bibtex":"@article{Van Hirtum_De Causmaecker_Goemaere_Kenter_Riebler_Lass_Plessl_2024, title={A Computation of the Ninth Dedekind Number Using FPGA Supercomputing}, volume={17}, DOI={<a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>}, number={3}, journal={ACM Transactions on Reconfigurable Technology and Systems}, publisher={Association for Computing Machinery (ACM)}, author={Van Hirtum, Lennart and De Causmaecker, Patrick and Goemaere, Jens and Kenter, Tobias and Riebler, Heinrich and Lass, Michael and Plessl, Christian}, year={2024}, pages={1–28} }","short":"L. Van Hirtum, P. De Causmaecker, J. Goemaere, T. Kenter, H. Riebler, M. Lass, C. Plessl, ACM Transactions on Reconfigurable Technology and Systems 17 (2024) 1–28.","mla":"Van Hirtum, Lennart, et al. “A Computation of the Ninth Dedekind Number Using FPGA Supercomputing.” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, vol. 17, no. 3, Association for Computing Machinery (ACM), 2024, pp. 1–28, doi:<a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>.","chicago":"Van Hirtum, Lennart, Patrick De Causmaecker, Jens Goemaere, Tobias Kenter, Heinrich Riebler, Michael Lass, and Christian Plessl. “A Computation of the Ninth Dedekind Number Using FPGA Supercomputing.” <i>ACM Transactions on Reconfigurable Technology and Systems</i> 17, no. 3 (2024): 1–28. <a href=\"https://doi.org/10.1145/3674147\">https://doi.org/10.1145/3674147</a>.","ieee":"L. Van Hirtum <i>et al.</i>, “A Computation of the Ninth Dedekind Number Using FPGA Supercomputing,” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, vol. 17, no. 3, pp. 1–28, 2024, doi: <a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>.","ama":"Van Hirtum L, De Causmaecker P, Goemaere J, et al. A Computation of the Ninth Dedekind Number Using FPGA Supercomputing. <i>ACM Transactions on Reconfigurable Technology and Systems</i>. 2024;17(3):1-28. doi:<a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>"},"page":"1-28","intvolume":"        17","publication_status":"published","publication_identifier":{"issn":["1936-7406","1936-7414"]},"language":[{"iso":"eng"}],"abstract":[{"text":"This manuscript makes the claim of having computed the 9th Dedekind number, D(9). This was done by accelerating the core operation of the process with an efficient FPGA design that outperforms an optimized 64-core CPU reference by 95x. The FPGA execution was parallelized on the Noctua 2 supercomputer at Paderborn University. The resulting value for D(9) is 286386577668298411128469151667598498812366. This value can be verified in two steps. We have made the data file containing the 490 M results available, each of which can be verified separately on CPU, and the whole file sums to our proposed value. The paper explains the mathematical approach in the first part, before putting the focus on a deep dive into the FPGA accelerator implementation followed by a performance analysis. The FPGA implementation was done in Register-Transfer Level using a dual-clock architecture and shows how we achieved an impressive FMax of 450 MHz on the targeted Stratix 10 GX 2,800 FPGAs. The total compute time used was 47,000 FPGA hours.","lang":"eng"}],"publication":"ACM Transactions on Reconfigurable Technology and Systems","title":"A Computation of the Ninth Dedekind Number Using FPGA Supercomputing","publisher":"Association for Computing Machinery (ACM)","date_created":"2024-10-14T07:38:29Z","year":"2024","quality_controlled":"1","issue":"3"},{"year":"2024","issue":"1","title":"Scalable quantum detector tomography by high-performance computing","publisher":"IOP Publishing","date_created":"2024-04-04T08:43:18Z","abstract":[{"text":"At large scales, quantum systems may become advantageous over their classical counterparts at performing certain tasks. Developing tools to analyze these systems at the relevant scales, in a manner consistent with quantum mechanics, is therefore critical to benchmarking performance and characterizing their operation. While classical computational approaches cannot perform like-for-like computations of quantum systems beyond a certain scale, classical high-performance computing (HPC) may nevertheless be useful for precisely these characterization and certification tasks. By developing open-source customized algorithms using high-performance computing, we perform quantum tomography on a megascale quantum photonic detector covering a Hilbert space of 106. This requires finding 108 elements of the matrix corresponding to the positive operator valued measure (POVM), the quantum description of the detector, and is achieved in minutes of computation time. Moreover, by exploiting the structure of the problem, we achieve highly efficient parallel scaling, paving the way for quantum objects up to a system size of 1012 elements to be reconstructed using this method. In general, this shows that a consistent quantum mechanical description of quantum phenomena is applicable at everyday scales. More concretely, this enables the reconstruction of large-scale quantum sources, processes and detectors used in computation and sampling tasks, which may be necessary to prove their nonclassical character or quantum computational advantage.","lang":"eng"}],"publication":"Quantum Science and Technology","language":[{"iso":"eng"}],"external_id":{"arxiv":["2404.02844"]},"intvolume":"        10","citation":{"ieee":"T. Schapeler, R. Schade, M. Lass, C. Plessl, and T. Bartley, “Scalable quantum detector tomography by high-performance computing,” <i>Quantum Science and Technology</i>, vol. 10, no. 1, 2024, doi: <a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>.","chicago":"Schapeler, Timon, Robert Schade, Michael Lass, Christian Plessl, and Tim Bartley. “Scalable Quantum Detector Tomography by High-Performance Computing.” <i>Quantum Science and Technology</i> 10, no. 1 (2024). <a href=\"https://doi.org/10.1088/2058-9565/ad8511\">https://doi.org/10.1088/2058-9565/ad8511</a>.","ama":"Schapeler T, Schade R, Lass M, Plessl C, Bartley T. Scalable quantum detector tomography by high-performance computing. <i>Quantum Science and Technology</i>. 2024;10(1). doi:<a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>","mla":"Schapeler, Timon, et al. “Scalable Quantum Detector Tomography by High-Performance Computing.” <i>Quantum Science and Technology</i>, vol. 10, no. 1, IOP Publishing, 2024, doi:<a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>.","short":"T. Schapeler, R. Schade, M. Lass, C. Plessl, T. Bartley, Quantum Science and Technology 10 (2024).","bibtex":"@article{Schapeler_Schade_Lass_Plessl_Bartley_2024, title={Scalable quantum detector tomography by high-performance computing}, volume={10}, DOI={<a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>}, number={1}, journal={Quantum Science and Technology}, publisher={IOP Publishing}, author={Schapeler, Timon and Schade, Robert and Lass, Michael and Plessl, Christian and Bartley, Tim}, year={2024} }","apa":"Schapeler, T., Schade, R., Lass, M., Plessl, C., &#38; Bartley, T. (2024). Scalable quantum detector tomography by high-performance computing. <i>Quantum Science and Technology</i>, <i>10</i>(1). <a href=\"https://doi.org/10.1088/2058-9565/ad8511\">https://doi.org/10.1088/2058-9565/ad8511</a>"},"doi":"10.1088/2058-9565/ad8511","main_file_link":[{"open_access":"1"}],"date_updated":"2025-12-16T11:32:12Z","oa":"1","volume":10,"author":[{"orcid":"0000-0001-7652-1716","last_name":"Schapeler","full_name":"Schapeler, Timon","id":"55629","first_name":"Timon"},{"first_name":"Robert","last_name":"Schade","orcid":"0000-0002-6268-5397","id":"75963","full_name":"Schade, Robert"},{"last_name":"Lass","orcid":"0000-0002-5708-7632","full_name":"Lass, Michael","id":"24135","first_name":"Michael"},{"first_name":"Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153"},{"first_name":"Tim","id":"49683","full_name":"Bartley, Tim","last_name":"Bartley"}],"status":"public","type":"journal_article","_id":"53202","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"_id":"239","name":"ERC-Grant: QuESADILLA: Quantum Engineering Superconducting Array Detectors in Low-Light Applications"},{"_id":"191","name":"PhoQuant: Photonische Quantencomputer -  Quantencomputing Testplattform"}],"department":[{"_id":"27"},{"_id":"623"},{"_id":"15"}],"user_id":"55629"},{"type":"preprint","publication":"arXiv:2304.03039","abstract":[{"text":"This preprint makes the claim of having computed the $9^{th}$ Dedekind\r\nNumber. This was done by building an efficient FPGA Accelerator for the core\r\noperation of the process, and parallelizing it on the Noctua 2 Supercluster at\r\nPaderborn University. The resulting value is\r\n286386577668298411128469151667598498812366. This value can be verified in two\r\nsteps. We have made the data file containing the 490M results available, each\r\nof which can be verified separately on CPU, and the whole file sums to our\r\nproposed value.","lang":"eng"}],"status":"public","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"external_id":{"arxiv":["2304.03039"]},"_id":"43439","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"language":[{"iso":"eng"}],"year":"2023","citation":{"ieee":"L. Van Hirtum <i>et al.</i>, “A computation of D(9) using FPGA Supercomputing,” <i>arXiv:2304.03039</i>. 2023.","chicago":"Van Hirtum, Lennart, Patrick De Causmaecker, Jens Goemaere, Tobias Kenter, Heinrich Riebler, Michael Lass, and Christian Plessl. “A Computation of D(9) Using FPGA Supercomputing.” <i>ArXiv:2304.03039</i>, 2023.","ama":"Van Hirtum L, De Causmaecker P, Goemaere J, et al. A computation of D(9) using FPGA Supercomputing. <i>arXiv:230403039</i>. Published online 2023.","apa":"Van Hirtum, L., De Causmaecker, P., Goemaere, J., Kenter, T., Riebler, H., Lass, M., &#38; Plessl, C. (2023). A computation of D(9) using FPGA Supercomputing. In <i>arXiv:2304.03039</i>.","short":"L. Van Hirtum, P. De Causmaecker, J. Goemaere, T. Kenter, H. Riebler, M. Lass, C. Plessl, ArXiv:2304.03039 (2023).","bibtex":"@article{Van Hirtum_De Causmaecker_Goemaere_Kenter_Riebler_Lass_Plessl_2023, title={A computation of D(9) using FPGA Supercomputing}, journal={arXiv:2304.03039}, author={Van Hirtum, Lennart and De Causmaecker, Patrick and Goemaere, Jens and Kenter, Tobias and Riebler, Heinrich and Lass, Michael and Plessl, Christian}, year={2023} }","mla":"Van Hirtum, Lennart, et al. “A Computation of D(9) Using FPGA Supercomputing.” <i>ArXiv:2304.03039</i>, 2023."},"date_updated":"2024-01-22T09:56:42Z","date_created":"2023-04-08T11:05:29Z","author":[{"full_name":"Van Hirtum, Lennart","last_name":"Van Hirtum","first_name":"Lennart"},{"first_name":"Patrick","last_name":"De Causmaecker","full_name":"De Causmaecker, Patrick"},{"last_name":"Goemaere","full_name":"Goemaere, Jens","first_name":"Jens"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"first_name":"Heinrich","full_name":"Riebler, Heinrich","id":"8961","last_name":"Riebler"},{"last_name":"Lass","orcid":"0000-0002-5708-7632","full_name":"Lass, Michael","id":"24135","first_name":"Michael"},{"id":"16153","full_name":"Plessl, Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","first_name":"Christian"}],"title":"A computation of D(9) using FPGA Supercomputing"},{"status":"public","type":"conference","publication":"Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)","language":[{"iso":"eng"}],"user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"46188","citation":{"chicago":"Faj, Jennifer, Tobias Kenter, Sara Faghih-Naini, Christian Plessl, and Vadym Aizinger. “Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes.” In <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>. ACM, 2023. <a href=\"https://doi.org/10.1145/3592979.3593407\">https://doi.org/10.1145/3592979.3593407</a>.","ieee":"J. Faj, T. Kenter, S. Faghih-Naini, C. Plessl, and V. Aizinger, “Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes,” 2023, doi: <a href=\"https://doi.org/10.1145/3592979.3593407\">10.1145/3592979.3593407</a>.","ama":"Faj J, Kenter T, Faghih-Naini S, Plessl C, Aizinger V. Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes. In: <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>. ACM; 2023. doi:<a href=\"https://doi.org/10.1145/3592979.3593407\">10.1145/3592979.3593407</a>","apa":"Faj, J., Kenter, T., Faghih-Naini, S., Plessl, C., &#38; Aizinger, V. (2023). Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes. <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>. <a href=\"https://doi.org/10.1145/3592979.3593407\">https://doi.org/10.1145/3592979.3593407</a>","mla":"Faj, Jennifer, et al. “Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes.” <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>, ACM, 2023, doi:<a href=\"https://doi.org/10.1145/3592979.3593407\">10.1145/3592979.3593407</a>.","bibtex":"@inproceedings{Faj_Kenter_Faghih-Naini_Plessl_Aizinger_2023, title={Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes}, DOI={<a href=\"https://doi.org/10.1145/3592979.3593407\">10.1145/3592979.3593407</a>}, booktitle={Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)}, publisher={ACM}, author={Faj, Jennifer and Kenter, Tobias and Faghih-Naini, Sara and Plessl, Christian and Aizinger, Vadym}, year={2023} }","short":"J. Faj, T. Kenter, S. Faghih-Naini, C. Plessl, V. Aizinger, in: Proceedings of the Platform for Advanced Scientific Computing Conference (PASC), ACM, 2023."},"year":"2023","related_material":{"link":[{"description":"Open Access available via this link.","relation":"other","url":"https://www.sighpc.org/for-our-community/acm-open-tocs/pasc23-open-toc"}]},"publication_status":"published","quality_controlled":"1","main_file_link":[{"url":"https://dl.acm.org/doi/pdf/10.1145/3592979.3593407"}],"doi":"10.1145/3592979.3593407","title":"Scalable Multi-FPGA Design of a Discontinuous Galerkin Shallow-Water Model on Unstructured Meshes","author":[{"last_name":"Faj","full_name":"Faj, Jennifer","id":"78722","first_name":"Jennifer"},{"id":"3145","full_name":"Kenter, Tobias","last_name":"Kenter","first_name":"Tobias"},{"first_name":"Sara","full_name":"Faghih-Naini, Sara","last_name":"Faghih-Naini"},{"first_name":"Christian","id":"16153","full_name":"Plessl, Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl"},{"last_name":"Aizinger","full_name":"Aizinger, Vadym","first_name":"Vadym"}],"date_created":"2023-07-28T09:42:14Z","publisher":"ACM","date_updated":"2024-04-17T08:09:39Z"},{"date_created":"2023-07-07T08:15:45Z","publisher":"Heinz Nixdorf Institut, Universität Paderborn","title":"Compute Centers I: Heterogeneous Execution Environments","year":"2023","language":[{"iso":"eng"}],"ddc":["004"],"publication":"On-The-Fly Computing -- Individualized IT-services in dynamic markets","file":[{"content_type":"application/pdf","relation":"main_file","creator":"florida","date_created":"2023-07-07T08:15:35Z","date_updated":"2023-07-07T11:17:33Z","access_level":"open_access","file_id":"45894","file_name":"C2-Chapter-SFB-Buch-Final.pdf","file_size":2288788}],"volume":412,"author":[{"first_name":"Tim","last_name":"Hansmeier","orcid":"0000-0003-1377-3339","full_name":"Hansmeier, Tim","id":"49992"},{"full_name":"Kenter, Tobias","id":"3145","last_name":"Kenter","first_name":"Tobias"},{"last_name":"Meyer","full_name":"Meyer, Marius","id":"40778","first_name":"Marius"},{"first_name":"Heinrich","full_name":"Riebler, Heinrich","id":"8961","last_name":"Riebler"},{"first_name":"Marco","id":"398","full_name":"Platzner, Marco","last_name":"Platzner"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian"}],"oa":"1","date_updated":"2024-05-02T10:33:00Z","doi":"10.5281/zenodo.8068642","has_accepted_license":"1","intvolume":"       412","page":"165-182","citation":{"apa":"Hansmeier, T., Kenter, T., Meyer, M., Riebler, H., Platzner, M., &#38; Plessl, C. (2023). Compute Centers I: Heterogeneous Execution Environments. In C.-J. Haake, F. Meyer auf der Heide, M. Platzner, H. Wachsmuth, &#38; H. Wehrheim (Eds.), <i>On-The-Fly Computing -- Individualized IT-services in dynamic markets</i> (Vol. 412, pp. 165–182). Heinz Nixdorf Institut, Universität Paderborn. <a href=\"https://doi.org/10.5281/zenodo.8068642\">https://doi.org/10.5281/zenodo.8068642</a>","mla":"Hansmeier, Tim, et al. “Compute Centers I: Heterogeneous Execution Environments.” <i>On-The-Fly Computing -- Individualized IT-Services in Dynamic Markets</i>, edited by Claus-Jochen Haake et al., vol. 412, Heinz Nixdorf Institut, Universität Paderborn, 2023, pp. 165–82, doi:<a href=\"https://doi.org/10.5281/zenodo.8068642\">10.5281/zenodo.8068642</a>.","short":"T. Hansmeier, T. Kenter, M. Meyer, H. Riebler, M. Platzner, C. Plessl, in: C.-J. Haake, F. Meyer auf der Heide, M. Platzner, H. Wachsmuth, H. Wehrheim (Eds.), On-The-Fly Computing -- Individualized IT-Services in Dynamic Markets, Heinz Nixdorf Institut, Universität Paderborn, Paderborn, 2023, pp. 165–182.","bibtex":"@inbook{Hansmeier_Kenter_Meyer_Riebler_Platzner_Plessl_2023, place={Paderborn}, series={Verlagsschriftenreihe des Heinz Nixdorf Instituts}, title={Compute Centers I: Heterogeneous Execution Environments}, volume={412}, DOI={<a href=\"https://doi.org/10.5281/zenodo.8068642\">10.5281/zenodo.8068642</a>}, booktitle={On-The-Fly Computing -- Individualized IT-services in dynamic markets}, publisher={Heinz Nixdorf Institut, Universität Paderborn}, author={Hansmeier, Tim and Kenter, Tobias and Meyer, Marius and Riebler, Heinrich and Platzner, Marco and Plessl, Christian}, editor={Haake, Claus-Jochen and Meyer auf der Heide, Friedhelm and Platzner, Marco and Wachsmuth, Henning and Wehrheim, Heike}, year={2023}, pages={165–182}, collection={Verlagsschriftenreihe des Heinz Nixdorf Instituts} }","ama":"Hansmeier T, Kenter T, Meyer M, Riebler H, Platzner M, Plessl C. Compute Centers I: Heterogeneous Execution Environments. In: Haake C-J, Meyer auf der Heide F, Platzner M, Wachsmuth H, Wehrheim H, eds. <i>On-The-Fly Computing -- Individualized IT-Services in Dynamic Markets</i>. Vol 412. Verlagsschriftenreihe des Heinz Nixdorf Instituts. Heinz Nixdorf Institut, Universität Paderborn; 2023:165-182. doi:<a href=\"https://doi.org/10.5281/zenodo.8068642\">10.5281/zenodo.8068642</a>","chicago":"Hansmeier, Tim, Tobias Kenter, Marius Meyer, Heinrich Riebler, Marco Platzner, and Christian Plessl. “Compute Centers I: Heterogeneous Execution Environments.” In <i>On-The-Fly Computing -- Individualized IT-Services in Dynamic Markets</i>, edited by Claus-Jochen Haake, Friedhelm Meyer auf der Heide, Marco Platzner, Henning Wachsmuth, and Heike Wehrheim, 412:165–82. Verlagsschriftenreihe Des Heinz Nixdorf Instituts. Paderborn: Heinz Nixdorf Institut, Universität Paderborn, 2023. <a href=\"https://doi.org/10.5281/zenodo.8068642\">https://doi.org/10.5281/zenodo.8068642</a>.","ieee":"T. Hansmeier, T. Kenter, M. Meyer, H. Riebler, M. Platzner, and C. Plessl, “Compute Centers I: Heterogeneous Execution Environments,” in <i>On-The-Fly Computing -- Individualized IT-services in dynamic markets</i>, vol. 412, C.-J. Haake, F. Meyer auf der Heide, M. Platzner, H. Wachsmuth, and H. Wehrheim, Eds. Paderborn: Heinz Nixdorf Institut, Universität Paderborn, 2023, pp. 165–182."},"place":"Paderborn","department":[{"_id":"7"},{"_id":"27"},{"_id":"518"},{"_id":"78"}],"user_id":"398","series_title":"Verlagsschriftenreihe des Heinz Nixdorf Instituts","_id":"45893","project":[{"grant_number":"160364472","name":"SFB 901: SFB 901: On-The-Fly Computing - Individualisierte IT-Dienstleistungen in dynamischen Märkten ","_id":"1"},{"name":"SFB 901 - C: SFB 901 - Project Area C","_id":"4"},{"name":"SFB 901 - C2: SFB 901 - On-The-Fly Compute Centers I: Heterogene Ausführungsumgebungen (Subproject C2)","_id":"14","grant_number":"160364472"}],"file_date_updated":"2023-07-07T11:17:33Z","type":"book_chapter","status":"public","editor":[{"first_name":"Claus-Jochen","full_name":"Haake, Claus-Jochen","last_name":"Haake"},{"last_name":"Meyer auf der Heide","full_name":"Meyer auf der Heide, Friedhelm","first_name":"Friedhelm"},{"last_name":"Platzner","full_name":"Platzner, Marco","first_name":"Marco"},{"full_name":"Wachsmuth, Henning","last_name":"Wachsmuth","first_name":"Henning"},{"first_name":"Heike","last_name":"Wehrheim","full_name":"Wehrheim, Heike"}]},{"quality_controlled":"1","publication_identifier":{"issn":["1936-7406","1936-7414"]},"publication_status":"published","citation":{"ama":"Meyer M, Kenter T, Plessl C. Multi-FPGA Designs and Scaling of HPC Challenge Benchmarks via MPI and Circuit-Switched Inter-FPGA Networks. <i>ACM Transactions on Reconfigurable Technology and Systems</i>. Published online 2023. doi:<a href=\"https://doi.org/10.1145/3576200\">10.1145/3576200</a>","chicago":"Meyer, Marius, Tobias Kenter, and Christian Plessl. “Multi-FPGA Designs and Scaling of HPC Challenge Benchmarks via MPI and Circuit-Switched Inter-FPGA Networks.” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, 2023. <a href=\"https://doi.org/10.1145/3576200\">https://doi.org/10.1145/3576200</a>.","ieee":"M. Meyer, T. Kenter, and C. Plessl, “Multi-FPGA Designs and Scaling of HPC Challenge Benchmarks via MPI and Circuit-Switched Inter-FPGA Networks,” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, 2023, doi: <a href=\"https://doi.org/10.1145/3576200\">10.1145/3576200</a>.","mla":"Meyer, Marius, et al. “Multi-FPGA Designs and Scaling of HPC Challenge Benchmarks via MPI and Circuit-Switched Inter-FPGA Networks.” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, Association for Computing Machinery (ACM), 2023, doi:<a href=\"https://doi.org/10.1145/3576200\">10.1145/3576200</a>.","short":"M. Meyer, T. Kenter, C. Plessl, ACM Transactions on Reconfigurable Technology and Systems (2023).","bibtex":"@article{Meyer_Kenter_Plessl_2023, title={Multi-FPGA Designs and Scaling of HPC Challenge Benchmarks via MPI and Circuit-Switched Inter-FPGA Networks}, DOI={<a href=\"https://doi.org/10.1145/3576200\">10.1145/3576200</a>}, journal={ACM Transactions on Reconfigurable Technology and Systems}, publisher={Association for Computing Machinery (ACM)}, author={Meyer, Marius and Kenter, Tobias and Plessl, Christian}, year={2023} }","apa":"Meyer, M., Kenter, T., &#38; Plessl, C. (2023). Multi-FPGA Designs and Scaling of HPC Challenge Benchmarks via MPI and Circuit-Switched Inter-FPGA Networks. <i>ACM Transactions on Reconfigurable Technology and Systems</i>. <a href=\"https://doi.org/10.1145/3576200\">https://doi.org/10.1145/3576200</a>"},"year":"2023","date_created":"2023-01-23T08:40:42Z","author":[{"first_name":"Marius","full_name":"Meyer, Marius","id":"40778","last_name":"Meyer"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","id":"16153","full_name":"Plessl, Christian","first_name":"Christian"}],"date_updated":"2023-07-28T08:02:05Z","publisher":"Association for Computing Machinery (ACM)","oa":"1","doi":"10.1145/3576200","main_file_link":[{"open_access":"1","url":"https://dl.acm.org/doi/10.1145/3576200"}],"title":"Multi-FPGA Designs and Scaling of HPC Challenge Benchmarks via MPI and Circuit-Switched Inter-FPGA Networks","publication":"ACM Transactions on Reconfigurable Technology and Systems","type":"journal_article","status":"public","abstract":[{"lang":"eng","text":"<jats:p>While FPGA accelerator boards and their respective high-level design tools are maturing, there is still a lack of multi-FPGA applications, libraries, and not least, benchmarks and reference implementations towards sustained HPC usage of these devices. As in the early days of GPUs in HPC, for workloads that can reasonably be decoupled into loosely coupled working sets, multi-accelerator support can be achieved by using standard communication interfaces like MPI on the host side. However, for performance and productivity, some applications can profit from a tighter coupling of the accelerators. FPGAs offer unique opportunities here when extending the dataflow characteristics to their communication interfaces.</jats:p>\r\n          <jats:p>In this work, we extend the HPCC FPGA benchmark suite by multi-FPGA support and three missing benchmarks that particularly characterize or stress inter-device communication: b_eff, PTRANS, and LINPACK. With all benchmarks implemented for current boards with Intel and Xilinx FPGAs, we established a baseline for multi-FPGA performance. Additionally, for the communication-centric benchmarks, we explored the potential of direct FPGA-to-FPGA communication with a circuit-switched inter-FPGA network that is currently only available for one of the boards. The evaluation with parallel execution on up to 26 FPGA boards makes use of one of the largest academic FPGA installations.</jats:p>"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"24135","_id":"38041","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"name":"SFB 901 - C: SFB 901 - Project Area C","_id":"4"},{"name":"SFB 901: SFB 901","_id":"1","grant_number":"160364472"},{"name":"SFB 901 - C2: SFB 901 - Subproject C2","_id":"14","grant_number":"160364472"}],"language":[{"iso":"eng"}],"keyword":["General Computer Science"]},{"language":[{"iso":"eng"}],"_id":"43228","external_id":{"arxiv":["2303.13632"]},"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"75963","abstract":[{"text":"The computation of electron repulsion integrals (ERIs) over Gaussian-type orbitals (GTOs) is a challenging problem in quantum-mechanics-based atomistic simulations. In practical simulations, several trillions of ERIs may have to be\r\ncomputed for every time step.\r\nIn this work, we investigate FPGAs as accelerators for the ERI computation. We use template parameters, here within the Intel oneAPI tool flow, to create customized designs for 256 different ERI quartet classes, based on their orbitals. To maximize data reuse, all intermediates are buffered in FPGA on-chip memory with customized layout. The pre-calculation of intermediates also helps to overcome data dependencies caused by multi-dimensional recurrence\r\nrelations. The involved loop structures are partially or even fully unrolled for high throughput of FPGA kernels. Furthermore, a lossy compression algorithm utilizing arbitrary bitwidth integers is integrated in the FPGA kernels. To our\r\nbest knowledge, this is the first work on ERI computation on FPGAs that supports more than just the single most basic quartet class. Also, the integration of ERI computation and compression it a novelty that is not even covered by CPU or GPU libraries so far.\r\nOur evaluation shows that using 16-bit integer for the ERI compression, the fastest FPGA kernels exceed the performance of 10 GERIS ($10 \\times 10^9$ ERIs per second) on one Intel Stratix 10 GX 2800 FPGA, with maximum absolute errors around $10^{-7}$ - $10^{-5}$ Hartree. The measured throughput can be accurately explained by a performance model. The FPGA kernels deployed on 2 FPGAs outperform similar computations using the widely used libint reference on a two-socket server with 40 Xeon Gold 6148 CPU cores of the same process technology by factors up to 6.0x and on a new two-socket server with 128 EPYC 7713 CPU cores by up to 1.9x.","lang":"eng"}],"status":"public","publication":"2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","type":"conference","title":"Computing and Compressing Electron Repulsion Integrals on FPGAs","doi":"10.1109/FCCM57271.2023.00026","main_file_link":[{"url":"https://ieeexplore.ieee.org/document/10171537"}],"date_updated":"2023-08-02T15:05:42Z","date_created":"2023-03-30T11:15:40Z","author":[{"first_name":"Xin","id":"77439","full_name":"Wu, Xin","last_name":"Wu"},{"id":"3145","full_name":"Kenter, Tobias","last_name":"Kenter","first_name":"Tobias"},{"first_name":"Robert","orcid":"0000-0002-6268-539","last_name":"Schade","id":"75963","full_name":"Schade, Robert"},{"first_name":"Thomas","last_name":"Kühne","id":"49079","full_name":"Kühne, Thomas"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153","first_name":"Christian"}],"year":"2023","page":"162-173","citation":{"apa":"Wu, X., Kenter, T., Schade, R., Kühne, T., &#38; Plessl, C. (2023). Computing and Compressing Electron Repulsion Integrals on FPGAs. <i>2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 162–173. <a href=\"https://doi.org/10.1109/FCCM57271.2023.00026\">https://doi.org/10.1109/FCCM57271.2023.00026</a>","ama":"Wu X, Kenter T, Schade R, Kühne T, Plessl C. Computing and Compressing Electron Repulsion Integrals on FPGAs. In: <i>2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>. ; 2023:162-173. doi:<a href=\"https://doi.org/10.1109/FCCM57271.2023.00026\">10.1109/FCCM57271.2023.00026</a>","short":"X. Wu, T. Kenter, R. Schade, T. Kühne, C. Plessl, in: 2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), 2023, pp. 162–173.","mla":"Wu, Xin, et al. “Computing and Compressing Electron Repulsion Integrals on FPGAs.” <i>2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 2023, pp. 162–73, doi:<a href=\"https://doi.org/10.1109/FCCM57271.2023.00026\">10.1109/FCCM57271.2023.00026</a>.","bibtex":"@inproceedings{Wu_Kenter_Schade_Kühne_Plessl_2023, title={Computing and Compressing Electron Repulsion Integrals on FPGAs}, DOI={<a href=\"https://doi.org/10.1109/FCCM57271.2023.00026\">10.1109/FCCM57271.2023.00026</a>}, booktitle={2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)}, author={Wu, Xin and Kenter, Tobias and Schade, Robert and Kühne, Thomas and Plessl, Christian}, year={2023}, pages={162–173} }","chicago":"Wu, Xin, Tobias Kenter, Robert Schade, Thomas Kühne, and Christian Plessl. “Computing and Compressing Electron Repulsion Integrals on FPGAs.” In <i>2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 162–73, 2023. <a href=\"https://doi.org/10.1109/FCCM57271.2023.00026\">https://doi.org/10.1109/FCCM57271.2023.00026</a>.","ieee":"X. Wu, T. Kenter, R. Schade, T. Kühne, and C. Plessl, “Computing and Compressing Electron Repulsion Integrals on FPGAs,” in <i>2023 IEEE 31st Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 2023, pp. 162–173, doi: <a href=\"https://doi.org/10.1109/FCCM57271.2023.00026\">10.1109/FCCM57271.2023.00026</a>."},"quality_controlled":"1"},{"date_created":"2023-05-30T09:19:09Z","publisher":"SAGE Publications","title":"Breaking the exascale barrier for the electronic structure problem in ab-initio molecular dynamics","quality_controlled":"1","year":"2023","language":[{"iso":"eng"}],"keyword":["Hardware and Architecture","Theoretical Computer Science","Software"],"publication":"The International Journal of High Performance Computing Applications","abstract":[{"text":"<jats:p> The non-orthogonal local submatrix method applied to electronic structure–based molecular dynamics simulations is shown to exceed 1.1 EFLOP/s in FP16/FP32-mixed floating-point arithmetic when using 4400 NVIDIA A100 GPUs of the Perlmutter system. This is enabled by a modification of the original method that pushes the sustained fraction of the peak performance to about 80%. Example calculations are performed for SARS-CoV-2 spike proteins with up to 83 million atoms. </jats:p>","lang":"eng"}],"author":[{"full_name":"Schade, Robert","id":"75963","orcid":"0000-0002-6268-539","last_name":"Schade","first_name":"Robert"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"first_name":"Hossam","full_name":"Elgabarty, Hossam","id":"60250","orcid":"0000-0002-4945-1481","last_name":"Elgabarty"},{"full_name":"Lass, Michael","id":"24135","orcid":"0000-0002-5708-7632","last_name":"Lass","first_name":"Michael"},{"first_name":"Thomas","last_name":"Kühne","full_name":"Kühne, Thomas","id":"49079"},{"id":"16153","full_name":"Plessl, Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","first_name":"Christian"}],"date_updated":"2023-08-02T15:04:53Z","oa":"1","doi":"10.1177/10943420231177631","main_file_link":[{"url":"https://journals.sagepub.com/doi/10.1177/10943420231177631","open_access":"1"}],"publication_identifier":{"issn":["1094-3420","1741-2846"]},"publication_status":"published","citation":{"ama":"Schade R, Kenter T, Elgabarty H, Lass M, Kühne T, Plessl C. Breaking the exascale barrier for the electronic structure problem in ab-initio molecular dynamics. <i>The International Journal of High Performance Computing Applications</i>. Published online 2023. doi:<a href=\"https://doi.org/10.1177/10943420231177631\">10.1177/10943420231177631</a>","apa":"Schade, R., Kenter, T., Elgabarty, H., Lass, M., Kühne, T., &#38; Plessl, C. (2023). Breaking the exascale barrier for the electronic structure problem in ab-initio molecular dynamics. <i>The International Journal of High Performance Computing Applications</i>, Article 109434202311776. <a href=\"https://doi.org/10.1177/10943420231177631\">https://doi.org/10.1177/10943420231177631</a>","bibtex":"@article{Schade_Kenter_Elgabarty_Lass_Kühne_Plessl_2023, title={Breaking the exascale barrier for the electronic structure problem in ab-initio molecular dynamics}, DOI={<a href=\"https://doi.org/10.1177/10943420231177631\">10.1177/10943420231177631</a>}, number={109434202311776}, journal={The International Journal of High Performance Computing Applications}, publisher={SAGE Publications}, author={Schade, Robert and Kenter, Tobias and Elgabarty, Hossam and Lass, Michael and Kühne, Thomas and Plessl, Christian}, year={2023} }","short":"R. Schade, T. Kenter, H. Elgabarty, M. Lass, T. Kühne, C. Plessl, The International Journal of High Performance Computing Applications (2023).","mla":"Schade, Robert, et al. “Breaking the Exascale Barrier for the Electronic Structure Problem in Ab-Initio Molecular Dynamics.” <i>The International Journal of High Performance Computing Applications</i>, 109434202311776, SAGE Publications, 2023, doi:<a href=\"https://doi.org/10.1177/10943420231177631\">10.1177/10943420231177631</a>.","ieee":"R. Schade, T. Kenter, H. Elgabarty, M. Lass, T. Kühne, and C. Plessl, “Breaking the exascale barrier for the electronic structure problem in ab-initio molecular dynamics,” <i>The International Journal of High Performance Computing Applications</i>, Art. no. 109434202311776, 2023, doi: <a href=\"https://doi.org/10.1177/10943420231177631\">10.1177/10943420231177631</a>.","chicago":"Schade, Robert, Tobias Kenter, Hossam Elgabarty, Michael Lass, Thomas Kühne, and Christian Plessl. “Breaking the Exascale Barrier for the Electronic Structure Problem in Ab-Initio Molecular Dynamics.” <i>The International Journal of High Performance Computing Applications</i>, 2023. <a href=\"https://doi.org/10.1177/10943420231177631\">https://doi.org/10.1177/10943420231177631</a>."},"department":[{"_id":"27"},{"_id":"518"}],"user_id":"75963","_id":"45361","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"article_type":"original","article_number":"109434202311776","type":"journal_article","status":"public"},{"language":[{"iso":"eng"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"46191","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"status":"public","type":"book_chapter","publication":"Lecture Notes in Computer Science","title":"Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline","doi":"10.1007/978-3-031-32041-5_5","publisher":"Springer Nature Switzerland","date_updated":"2025-11-04T09:32:49Z","date_created":"2023-07-28T09:53:21Z","author":[{"first_name":"Christoph","last_name":"Alt","id":"100625","full_name":"Alt, Christoph"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"full_name":"Faghih-Naini, Sara","last_name":"Faghih-Naini","first_name":"Sara"},{"full_name":"Faj, Jennifer","id":"78722","last_name":"Faj","first_name":"Jennifer"},{"full_name":"Opdenhövel, Jan-Oliver","id":"73960","last_name":"Opdenhövel","orcid":"0000-0003-2314-2784","first_name":"Jan-Oliver"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","full_name":"Plessl, Christian","id":"16153"},{"first_name":"Vadym","last_name":"Aizinger","full_name":"Aizinger, Vadym"},{"first_name":"Jan","last_name":"Hönig","full_name":"Hönig, Jan"},{"first_name":"Harald","full_name":"Köstler, Harald","last_name":"Köstler"}],"place":"Cham","year":"2023","citation":{"bibtex":"@inbook{Alt_Kenter_Faghih-Naini_Faj_Opdenhövel_Plessl_Aizinger_Hönig_Köstler_2023, place={Cham}, title={Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline}, DOI={<a href=\"https://doi.org/10.1007/978-3-031-32041-5_5\">10.1007/978-3-031-32041-5_5</a>}, booktitle={Lecture Notes in Computer Science}, publisher={Springer Nature Switzerland}, author={Alt, Christoph and Kenter, Tobias and Faghih-Naini, Sara and Faj, Jennifer and Opdenhövel, Jan-Oliver and Plessl, Christian and Aizinger, Vadym and Hönig, Jan and Köstler, Harald}, year={2023} }","mla":"Alt, Christoph, et al. “Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline.” <i>Lecture Notes in Computer Science</i>, Springer Nature Switzerland, 2023, doi:<a href=\"https://doi.org/10.1007/978-3-031-32041-5_5\">10.1007/978-3-031-32041-5_5</a>.","short":"C. Alt, T. Kenter, S. Faghih-Naini, J. Faj, J.-O. Opdenhövel, C. Plessl, V. Aizinger, J. Hönig, H. Köstler, in: Lecture Notes in Computer Science, Springer Nature Switzerland, Cham, 2023.","apa":"Alt, C., Kenter, T., Faghih-Naini, S., Faj, J., Opdenhövel, J.-O., Plessl, C., Aizinger, V., Hönig, J., &#38; Köstler, H. (2023). Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline. In <i>Lecture Notes in Computer Science</i>. Springer Nature Switzerland. <a href=\"https://doi.org/10.1007/978-3-031-32041-5_5\">https://doi.org/10.1007/978-3-031-32041-5_5</a>","ama":"Alt C, Kenter T, Faghih-Naini S, et al. Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline. In: <i>Lecture Notes in Computer Science</i>. Springer Nature Switzerland; 2023. doi:<a href=\"https://doi.org/10.1007/978-3-031-32041-5_5\">10.1007/978-3-031-32041-5_5</a>","ieee":"C. Alt <i>et al.</i>, “Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline,” in <i>Lecture Notes in Computer Science</i>, Cham: Springer Nature Switzerland, 2023.","chicago":"Alt, Christoph, Tobias Kenter, Sara Faghih-Naini, Jennifer Faj, Jan-Oliver Opdenhövel, Christian Plessl, Vadym Aizinger, Jan Hönig, and Harald Köstler. “Shallow Water DG Simulations on FPGAs: Design and Comparison of a Novel Code Generation Pipeline.” In <i>Lecture Notes in Computer Science</i>. Cham: Springer Nature Switzerland, 2023. <a href=\"https://doi.org/10.1007/978-3-031-32041-5_5\">https://doi.org/10.1007/978-3-031-32041-5_5</a>."},"publication_status":"published","publication_identifier":{"isbn":["9783031320408","9783031320415"],"issn":["0302-9743","1611-3349"]},"quality_controlled":"1"},{"publication_status":"published","quality_controlled":"1","year":"2023","citation":{"ama":"Opdenhövel J-O, Plessl C, Kenter T. Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation. In: <i>Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies (HEART)</i>. ACM; 2023. doi:<a href=\"https://doi.org/10.1145/3597031.3597050\">10.1145/3597031.3597050</a>","ieee":"J.-O. Opdenhövel, C. Plessl, and T. Kenter, “Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation,” 2023, doi: <a href=\"https://doi.org/10.1145/3597031.3597050\">10.1145/3597031.3597050</a>.","chicago":"Opdenhövel, Jan-Oliver, Christian Plessl, and Tobias Kenter. “Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation.” In <i>Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies (HEART)</i>. ACM, 2023. <a href=\"https://doi.org/10.1145/3597031.3597050\">https://doi.org/10.1145/3597031.3597050</a>.","apa":"Opdenhövel, J.-O., Plessl, C., &#38; Kenter, T. (2023). Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation. <i>Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies (HEART)</i>. <a href=\"https://doi.org/10.1145/3597031.3597050\">https://doi.org/10.1145/3597031.3597050</a>","short":"J.-O. Opdenhövel, C. Plessl, T. Kenter, in: Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies (HEART), ACM, 2023.","mla":"Opdenhövel, Jan-Oliver, et al. “Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation.” <i>Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies (HEART)</i>, ACM, 2023, doi:<a href=\"https://doi.org/10.1145/3597031.3597050\">10.1145/3597031.3597050</a>.","bibtex":"@inproceedings{Opdenhövel_Plessl_Kenter_2023, title={Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation}, DOI={<a href=\"https://doi.org/10.1145/3597031.3597050\">10.1145/3597031.3597050</a>}, booktitle={Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies (HEART)}, publisher={ACM}, author={Opdenhövel, Jan-Oliver and Plessl, Christian and Kenter, Tobias}, year={2023} }"},"date_updated":"2025-11-04T09:32:30Z","publisher":"ACM","oa":"1","author":[{"first_name":"Jan-Oliver","last_name":"Opdenhövel","orcid":"0000-0003-2314-2784","id":"73960","full_name":"Opdenhövel, Jan-Oliver"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","id":"16153","full_name":"Plessl, Christian","first_name":"Christian"},{"full_name":"Kenter, Tobias","id":"3145","last_name":"Kenter","first_name":"Tobias"}],"date_created":"2023-07-28T09:49:23Z","title":"Mutation Tree Reconstruction of Tumor Cells on FPGAs Using a Bit-Level Matrix Representation","main_file_link":[{"url":"https://dl.acm.org/doi/pdf/10.1145/3597031.3597050","open_access":"1"}],"doi":"10.1145/3597031.3597050","type":"conference","publication":"Proceedings of the 13th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies (HEART)","status":"public","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"46190","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"language":[{"iso":"eng"}]},{"user_id":"24135","department":[{"_id":"27"},{"_id":"518"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"33493","external_id":{"arxiv":["2209.12747"]},"language":[{"iso":"eng"}],"type":"preprint","publication":"arXiv:2209.12747","status":"public","abstract":[{"lang":"eng","text":"Electronic structure calculations have been instrumental in providing many\r\nimportant insights into a range of physical and chemical properties of various\r\nmolecular and solid-state systems. Their importance to various fields,\r\nincluding materials science, chemical sciences, computational chemistry and\r\ndevice physics, is underscored by the large fraction of available public\r\nsupercomputing resources devoted to these calculations. As we enter the\r\nexascale era, exciting new opportunities to increase simulation numbers, sizes,\r\nand accuracies present themselves. In order to realize these promises, the\r\ncommunity of electronic structure software developers will however first have\r\nto tackle a number of challenges pertaining to the efficient use of new\r\narchitectures that will rely heavily on massive parallelism and hardware\r\naccelerators. This roadmap provides a broad overview of the state-of-the-art in\r\nelectronic structure calculations and of the various new directions being\r\npursued by the community. It covers 14 electronic structure codes, presenting\r\ntheir current status, their development priorities over the next five years,\r\nand their plans towards tackling the challenges and leveraging the\r\nopportunities presented by the advent of exascale computing."}],"author":[{"first_name":"Vikram","full_name":"Gavini, Vikram","last_name":"Gavini"},{"last_name":"Baroni","full_name":"Baroni, Stefano","first_name":"Stefano"},{"first_name":"Volker","last_name":"Blum","full_name":"Blum, Volker"},{"first_name":"David R.","last_name":"Bowler","full_name":"Bowler, David R."},{"first_name":"Alexander","last_name":"Buccheri","full_name":"Buccheri, Alexander"},{"first_name":"James R.","full_name":"Chelikowsky, James R.","last_name":"Chelikowsky"},{"first_name":"Sambit","last_name":"Das","full_name":"Das, Sambit"},{"full_name":"Dawson, William","last_name":"Dawson","first_name":"William"},{"first_name":"Pietro","full_name":"Delugas, Pietro","last_name":"Delugas"},{"last_name":"Dogan","full_name":"Dogan, Mehmet","first_name":"Mehmet"},{"full_name":"Draxl, Claudia","last_name":"Draxl","first_name":"Claudia"},{"full_name":"Galli, Giulia","last_name":"Galli","first_name":"Giulia"},{"last_name":"Genovese","full_name":"Genovese, Luigi","first_name":"Luigi"},{"full_name":"Giannozzi, Paolo","last_name":"Giannozzi","first_name":"Paolo"},{"first_name":"Matteo","last_name":"Giantomassi","full_name":"Giantomassi, Matteo"},{"full_name":"Gonze, Xavier","last_name":"Gonze","first_name":"Xavier"},{"full_name":"Govoni, Marco","last_name":"Govoni","first_name":"Marco"},{"last_name":"Gulans","full_name":"Gulans, Andris","first_name":"Andris"},{"first_name":"François","last_name":"Gygi","full_name":"Gygi, François"},{"first_name":"John M.","last_name":"Herbert","full_name":"Herbert, John M."},{"first_name":"Sebastian","full_name":"Kokott, Sebastian","last_name":"Kokott"},{"id":"49079","full_name":"Kühne, Thomas","last_name":"Kühne","first_name":"Thomas"},{"first_name":"Kai-Hsin","full_name":"Liou, Kai-Hsin","last_name":"Liou"},{"full_name":"Miyazaki, Tsuyoshi","last_name":"Miyazaki","first_name":"Tsuyoshi"},{"last_name":"Motamarri","full_name":"Motamarri, Phani","first_name":"Phani"},{"first_name":"Ayako","last_name":"Nakata","full_name":"Nakata, Ayako"},{"last_name":"Pask","full_name":"Pask, John E.","first_name":"John E."},{"first_name":"Christian","full_name":"Plessl, Christian","id":"16153","last_name":"Plessl","orcid":"0000-0001-5728-9982"},{"full_name":"Ratcliff, Laura E.","last_name":"Ratcliff","first_name":"Laura E."},{"first_name":"Ryan M.","full_name":"Richard, Ryan M.","last_name":"Richard"},{"last_name":"Rossi","full_name":"Rossi, Mariana","first_name":"Mariana"},{"last_name":"Schade","orcid":"0000-0002-6268-539","full_name":"Schade, Robert","id":"75963","first_name":"Robert"},{"last_name":"Scheffler","full_name":"Scheffler, Matthias","first_name":"Matthias"},{"first_name":"Ole","full_name":"Schütt, Ole","last_name":"Schütt"},{"first_name":"Phanish","last_name":"Suryanarayana","full_name":"Suryanarayana, Phanish"},{"full_name":"Torrent, Marc","last_name":"Torrent","first_name":"Marc"},{"first_name":"Lionel","full_name":"Truflandier, Lionel","last_name":"Truflandier"},{"first_name":"Theresa L.","last_name":"Windus","full_name":"Windus, Theresa L."},{"first_name":"Qimen","full_name":"Xu, Qimen","last_name":"Xu"},{"first_name":"Victor W. -Z.","full_name":"Yu, Victor W. -Z.","last_name":"Yu"},{"first_name":"Danny","full_name":"Perez, Danny","last_name":"Perez"}],"date_created":"2022-09-28T05:25:10Z","date_updated":"2023-07-28T08:03:41Z","title":"Roadmap on Electronic Structure Codes in the Exascale Era","citation":{"short":"V. Gavini, S. Baroni, V. Blum, D.R. Bowler, A. Buccheri, J.R. Chelikowsky, S. Das, W. Dawson, P. Delugas, M. Dogan, C. Draxl, G. Galli, L. Genovese, P. Giannozzi, M. Giantomassi, X. Gonze, M. Govoni, A. Gulans, F. Gygi, J.M. Herbert, S. Kokott, T. Kühne, K.-H. Liou, T. Miyazaki, P. Motamarri, A. Nakata, J.E. Pask, C. Plessl, L.E. Ratcliff, R.M. Richard, M. Rossi, R. Schade, M. Scheffler, O. Schütt, P. Suryanarayana, M. Torrent, L. Truflandier, T.L. Windus, Q. Xu, V.W.-Z. Yu, D. Perez, ArXiv:2209.12747 (2022).","mla":"Gavini, Vikram, et al. “Roadmap on Electronic Structure Codes in the Exascale Era.” <i>ArXiv:2209.12747</i>, 2022.","bibtex":"@article{Gavini_Baroni_Blum_Bowler_Buccheri_Chelikowsky_Das_Dawson_Delugas_Dogan_et al._2022, title={Roadmap on Electronic Structure Codes in the Exascale Era}, journal={arXiv:2209.12747}, author={Gavini, Vikram and Baroni, Stefano and Blum, Volker and Bowler, David R. and Buccheri, Alexander and Chelikowsky, James R. and Das, Sambit and Dawson, William and Delugas, Pietro and Dogan, Mehmet and et al.}, year={2022} }","apa":"Gavini, V., Baroni, S., Blum, V., Bowler, D. R., Buccheri, A., Chelikowsky, J. R., Das, S., Dawson, W., Delugas, P., Dogan, M., Draxl, C., Galli, G., Genovese, L., Giannozzi, P., Giantomassi, M., Gonze, X., Govoni, M., Gulans, A., Gygi, F., … Perez, D. (2022). Roadmap on Electronic Structure Codes in the Exascale Era. In <i>arXiv:2209.12747</i>.","ama":"Gavini V, Baroni S, Blum V, et al. Roadmap on Electronic Structure Codes in the Exascale Era. <i>arXiv:220912747</i>. Published online 2022.","chicago":"Gavini, Vikram, Stefano Baroni, Volker Blum, David R. Bowler, Alexander Buccheri, James R. Chelikowsky, Sambit Das, et al. “Roadmap on Electronic Structure Codes in the Exascale Era.” <i>ArXiv:2209.12747</i>, 2022.","ieee":"V. Gavini <i>et al.</i>, “Roadmap on Electronic Structure Codes in the Exascale Era,” <i>arXiv:2209.12747</i>. 2022."},"year":"2022"},{"year":"2022","citation":{"ama":"Karp M, Podobas A, Kenter T, et al. A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges. In: <i>International Conference on High Performance Computing in Asia-Pacific Region</i>. ACM; 2022. doi:<a href=\"https://doi.org/10.1145/3492805.3492808\">10.1145/3492805.3492808</a>","chicago":"Karp, Martin, Artur Podobas, Tobias Kenter, Niclas Jansson, Christian Plessl, Philipp Schlatter, and Stefano Markidis. “A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges.” In <i>International Conference on High Performance Computing in Asia-Pacific Region</i>. ACM, 2022. <a href=\"https://doi.org/10.1145/3492805.3492808\">https://doi.org/10.1145/3492805.3492808</a>.","ieee":"M. Karp <i>et al.</i>, “A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges,” 2022, doi: <a href=\"https://doi.org/10.1145/3492805.3492808\">10.1145/3492805.3492808</a>.","apa":"Karp, M., Podobas, A., Kenter, T., Jansson, N., Plessl, C., Schlatter, P., &#38; Markidis, S. (2022). A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges. <i>International Conference on High Performance Computing in Asia-Pacific Region</i>. <a href=\"https://doi.org/10.1145/3492805.3492808\">https://doi.org/10.1145/3492805.3492808</a>","bibtex":"@inproceedings{Karp_Podobas_Kenter_Jansson_Plessl_Schlatter_Markidis_2022, title={A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges}, DOI={<a href=\"https://doi.org/10.1145/3492805.3492808\">10.1145/3492805.3492808</a>}, booktitle={International Conference on High Performance Computing in Asia-Pacific Region}, publisher={ACM}, author={Karp, Martin and Podobas, Artur and Kenter, Tobias and Jansson, Niclas and Plessl, Christian and Schlatter, Philipp and Markidis, Stefano}, year={2022} }","short":"M. Karp, A. Podobas, T. Kenter, N. Jansson, C. Plessl, P. Schlatter, S. Markidis, in: International Conference on High Performance Computing in Asia-Pacific Region, ACM, 2022.","mla":"Karp, Martin, et al. “A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges.” <i>International Conference on High Performance Computing in Asia-Pacific Region</i>, ACM, 2022, doi:<a href=\"https://doi.org/10.1145/3492805.3492808\">10.1145/3492805.3492808</a>."},"quality_controlled":"1","publication_status":"published","title":"A High-Fidelity Flow Solver for Unstructured Meshes on Field-Programmable Gate Arrays: Design, Evaluation, and Future Challenges","doi":"10.1145/3492805.3492808","main_file_link":[{"open_access":"1","url":"https://dl.acm.org/doi/pdf/10.1145/3492805.3492808"}],"publisher":"ACM","date_updated":"2023-07-28T11:53:15Z","oa":"1","author":[{"last_name":"Karp","full_name":"Karp, Martin","first_name":"Martin"},{"first_name":"Artur","full_name":"Podobas, Artur","last_name":"Podobas"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"first_name":"Niclas","last_name":"Jansson","full_name":"Jansson, Niclas"},{"orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian","first_name":"Christian"},{"first_name":"Philipp","full_name":"Schlatter, Philipp","last_name":"Schlatter"},{"first_name":"Stefano","last_name":"Markidis","full_name":"Markidis, Stefano"}],"date_created":"2023-07-28T11:51:55Z","status":"public","publication":"International Conference on High Performance Computing in Asia-Pacific Region","type":"conference","language":[{"iso":"eng"}],"_id":"46193","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"3145"},{"citation":{"ama":"Kühne T, Plessl C, Schade R, Schütt O. CP2K on the road to exascale. <i>arXiv:220514741</i>. Published online 2022.","chicago":"Kühne, Thomas, Christian Plessl, Robert Schade, and Ole Schütt. “CP2K on the Road to Exascale.” <i>ArXiv:2205.14741</i>, 2022.","ieee":"T. Kühne, C. Plessl, R. Schade, and O. Schütt, “CP2K on the road to exascale,” <i>arXiv:2205.14741</i>. 2022.","bibtex":"@article{Kühne_Plessl_Schade_Schütt_2022, title={CP2K on the road to exascale}, journal={arXiv:2205.14741}, author={Kühne, Thomas and Plessl, Christian and Schade, Robert and Schütt, Ole}, year={2022} }","short":"T. Kühne, C. Plessl, R. Schade, O. Schütt, ArXiv:2205.14741 (2022).","mla":"Kühne, Thomas, et al. “CP2K on the Road to Exascale.” <i>ArXiv:2205.14741</i>, 2022.","apa":"Kühne, T., Plessl, C., Schade, R., &#38; Schütt, O. (2022). CP2K on the road to exascale. In <i>arXiv:2205.14741</i>."},"year":"2022","main_file_link":[{"url":"https://arxiv.org/abs/2205.14741"}],"title":"CP2K on the road to exascale","author":[{"first_name":"Thomas","last_name":"Kühne","id":"49079","full_name":"Kühne, Thomas"},{"id":"16153","full_name":"Plessl, Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","first_name":"Christian"},{"first_name":"Robert","id":"75963","full_name":"Schade, Robert","orcid":"0000-0002-6268-539","last_name":"Schade"},{"first_name":"Ole","last_name":"Schütt","full_name":"Schütt, Ole"}],"date_created":"2022-07-22T08:14:08Z","date_updated":"2023-08-02T14:55:35Z","status":"public","abstract":[{"lang":"eng","text":"The CP2K program package, which can be considered as the swiss army knife of\r\natomistic simulations, is presented with a special emphasis on ab-initio\r\nmolecular dynamics using the second-generation Car-Parrinello method. After\r\noutlining current and near-term development efforts with regards to massively\r\nparallel low-scaling post-Hartree-Fock and eigenvalue solvers, novel approaches\r\non how we plan to take full advantage of future low-precision hardware\r\narchitectures are introduced. Our focus here is on combining our submatrix\r\nmethod with the approximate computing paradigm to address the immanent exascale\r\nera."}],"publication":"arXiv:2205.14741","type":"preprint","language":[{"iso":"eng"}],"department":[{"_id":"27"},{"_id":"518"},{"_id":"304"}],"user_id":"75963","external_id":{"arxiv":["2205.14741"]},"_id":"32404","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}]},{"abstract":[{"text":"A parallel hybrid quantum-classical algorithm for the solution of the quantum-chemical ground-state energy problem on gate-based quantum computers is presented. This approach is based on the reduced density-matrix functional theory (RDMFT) formulation of the electronic structure problem. For that purpose, the density-matrix functional of the full system is decomposed into an indirectly coupled sum of density-matrix functionals for all its subsystems using the adaptive cluster approximation to RDMFT. The approximations involved in the decomposition and the adaptive cluster approximation itself can be systematically converged to the exact result. The solutions for the density-matrix functionals of the effective subsystems involves a constrained minimization over many-particle states that are approximated by parametrized trial states on the quantum computer similarly to the variational quantum eigensolver. The independence of the density-matrix functionals of the effective subsystems introduces a new level of parallelization and allows for the computational treatment of much larger molecules on a quantum computer with a given qubit count. In addition, for the proposed algorithm techniques are presented to reduce the qubit count, the number of quantum programs, as well as its depth. The evaluation of a density-matrix functional as the essential part of our approach is demonstrated for Hubbard-like systems on IBM quantum computers based on superconducting transmon qubits.","lang":"eng"}],"publication":"Phys. Rev. Research","language":[{"iso":"eng"}],"year":"2022","quality_controlled":"1","title":"Parallel quantum chemistry on noisy intermediate-scale quantum computers","publisher":"American Physical Society","date_created":"2022-08-29T14:07:01Z","status":"public","type":"journal_article","article_type":"original","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"33226","user_id":"75963","department":[{"_id":"27"},{"_id":"518"}],"citation":{"short":"R. Schade, C. Bauer, K. Tamoev, L. Mazur, C. Plessl, T. Kühne, Phys. Rev. Research 4 (2022) 033160.","bibtex":"@article{Schade_Bauer_Tamoev_Mazur_Plessl_Kühne_2022, title={Parallel quantum chemistry on noisy intermediate-scale quantum computers}, volume={4}, DOI={<a href=\"https://doi.org/10.1103/PhysRevResearch.4.033160\">10.1103/PhysRevResearch.4.033160</a>}, journal={Phys. Rev. Research}, publisher={American Physical Society}, author={Schade, Robert and Bauer, Carsten and Tamoev, Konstantin and Mazur, Lukas and Plessl, Christian and Kühne, Thomas}, year={2022}, pages={033160} }","mla":"Schade, Robert, et al. “Parallel Quantum Chemistry on Noisy Intermediate-Scale Quantum Computers.” <i>Phys. Rev. Research</i>, vol. 4, American Physical Society, 2022, p. 033160, doi:<a href=\"https://doi.org/10.1103/PhysRevResearch.4.033160\">10.1103/PhysRevResearch.4.033160</a>.","apa":"Schade, R., Bauer, C., Tamoev, K., Mazur, L., Plessl, C., &#38; Kühne, T. (2022). Parallel quantum chemistry on noisy intermediate-scale quantum computers. <i>Phys. Rev. Research</i>, <i>4</i>, 033160. <a href=\"https://doi.org/10.1103/PhysRevResearch.4.033160\">https://doi.org/10.1103/PhysRevResearch.4.033160</a>","ama":"Schade R, Bauer C, Tamoev K, Mazur L, Plessl C, Kühne T. Parallel quantum chemistry on noisy intermediate-scale quantum computers. <i>Phys Rev Research</i>. 2022;4:033160. doi:<a href=\"https://doi.org/10.1103/PhysRevResearch.4.033160\">10.1103/PhysRevResearch.4.033160</a>","chicago":"Schade, Robert, Carsten Bauer, Konstantin Tamoev, Lukas Mazur, Christian Plessl, and Thomas Kühne. “Parallel Quantum Chemistry on Noisy Intermediate-Scale Quantum Computers.” <i>Phys. Rev. Research</i> 4 (2022): 033160. <a href=\"https://doi.org/10.1103/PhysRevResearch.4.033160\">https://doi.org/10.1103/PhysRevResearch.4.033160</a>.","ieee":"R. Schade, C. Bauer, K. Tamoev, L. Mazur, C. Plessl, and T. Kühne, “Parallel quantum chemistry on noisy intermediate-scale quantum computers,” <i>Phys. Rev. Research</i>, vol. 4, p. 033160, 2022, doi: <a href=\"https://doi.org/10.1103/PhysRevResearch.4.033160\">10.1103/PhysRevResearch.4.033160</a>."},"page":"033160","intvolume":"         4","publication_status":"published","main_file_link":[{"url":"https://journals.aps.org/prresearch/abstract/10.1103/PhysRevResearch.4.033160","open_access":"1"}],"doi":"10.1103/PhysRevResearch.4.033160","date_updated":"2023-08-02T15:04:22Z","oa":"1","author":[{"last_name":"Schade","orcid":"0000-0002-6268-539","full_name":"Schade, Robert","id":"75963","first_name":"Robert"},{"full_name":"Bauer, Carsten","id":"90082","last_name":"Bauer","first_name":"Carsten"},{"first_name":"Konstantin","id":"50177","full_name":"Tamoev, Konstantin","last_name":"Tamoev"},{"first_name":"Lukas","full_name":"Mazur, Lukas","id":"90492","last_name":"Mazur","orcid":" 0000-0001-6304-7082"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian"},{"id":"49079","full_name":"Kühne, Thomas","last_name":"Kühne","first_name":"Thomas"}],"volume":4},{"date_updated":"2023-08-02T15:00:47Z","date_created":"2023-08-02T14:59:18Z","author":[{"first_name":"Vikram","last_name":"Gavini","full_name":"Gavini, Vikram"},{"last_name":"Baroni","full_name":"Baroni, Stefano","first_name":"Stefano"},{"last_name":"Blum","full_name":"Blum, Volker","first_name":"Volker"},{"full_name":"Bowler, David R.","last_name":"Bowler","first_name":"David R."},{"last_name":"Buccheri","full_name":"Buccheri, Alexander","first_name":"Alexander"},{"last_name":"Chelikowsky","full_name":"Chelikowsky, James R.","first_name":"James R."},{"full_name":"Das, Sambit","last_name":"Das","first_name":"Sambit"},{"first_name":"William","full_name":"Dawson, William","last_name":"Dawson"},{"first_name":"Pietro","last_name":"Delugas","full_name":"Delugas, Pietro"},{"first_name":"Mehmet","last_name":"Dogan","full_name":"Dogan, Mehmet"},{"full_name":"Draxl, Claudia","last_name":"Draxl","first_name":"Claudia"},{"first_name":"Giulia","last_name":"Galli","full_name":"Galli, Giulia"},{"last_name":"Genovese","full_name":"Genovese, Luigi","first_name":"Luigi"},{"last_name":"Giannozzi","full_name":"Giannozzi, Paolo","first_name":"Paolo"},{"first_name":"Matteo","last_name":"Giantomassi","full_name":"Giantomassi, Matteo"},{"full_name":"Gonze, Xavier","last_name":"Gonze","first_name":"Xavier"},{"last_name":"Govoni","full_name":"Govoni, Marco","first_name":"Marco"},{"full_name":"Gulans, Andris","last_name":"Gulans","first_name":"Andris"},{"last_name":"Gygi","full_name":"Gygi, François","first_name":"François"},{"first_name":"John M.","last_name":"Herbert","full_name":"Herbert, John M."},{"last_name":"Kokott","full_name":"Kokott, Sebastian","first_name":"Sebastian"},{"first_name":"Thomas","last_name":"Kühne","full_name":"Kühne, Thomas","id":"49079"},{"first_name":"Kai-Hsin","last_name":"Liou","full_name":"Liou, Kai-Hsin"},{"first_name":"Tsuyoshi","last_name":"Miyazaki","full_name":"Miyazaki, Tsuyoshi"},{"last_name":"Motamarri","full_name":"Motamarri, Phani","first_name":"Phani"},{"last_name":"Nakata","full_name":"Nakata, Ayako","first_name":"Ayako"},{"last_name":"Pask","full_name":"Pask, John E.","first_name":"John E."},{"full_name":"Plessl, Christian","id":"16153","last_name":"Plessl","orcid":"0000-0001-5728-9982","first_name":"Christian"},{"first_name":"Laura E.","full_name":"Ratcliff, Laura E.","last_name":"Ratcliff"},{"full_name":"Richard, Ryan M.","last_name":"Richard","first_name":"Ryan M."},{"first_name":"Mariana","last_name":"Rossi","full_name":"Rossi, Mariana"},{"first_name":"Robert","last_name":"Schade","orcid":"0000-0002-6268-539","full_name":"Schade, Robert","id":"75963"},{"first_name":"Matthias","full_name":"Scheffler, Matthias","last_name":"Scheffler"},{"last_name":"Schütt","full_name":"Schütt, Ole","first_name":"Ole"},{"last_name":"Suryanarayana","full_name":"Suryanarayana, Phanish","first_name":"Phanish"},{"full_name":"Torrent, Marc","last_name":"Torrent","first_name":"Marc"},{"first_name":"Lionel","last_name":"Truflandier","full_name":"Truflandier, Lionel"},{"full_name":"Windus, Theresa L.","last_name":"Windus","first_name":"Theresa L."},{"first_name":"Qimen","full_name":"Xu, Qimen","last_name":"Xu"},{"first_name":"Victor W. -Z.","last_name":"Yu","full_name":"Yu, Victor W. -Z."},{"first_name":"Danny","full_name":"Perez, Danny","last_name":"Perez"}],"title":"Roadmap on Electronic Structure Codes in the Exascale Era","year":"2022","citation":{"mla":"Gavini, Vikram, et al. “Roadmap on Electronic Structure Codes in the Exascale Era.” <i>ArXiv:2209.12747</i>, 2022.","short":"V. Gavini, S. Baroni, V. Blum, D.R. Bowler, A. Buccheri, J.R. Chelikowsky, S. Das, W. Dawson, P. Delugas, M. Dogan, C. Draxl, G. Galli, L. Genovese, P. Giannozzi, M. Giantomassi, X. Gonze, M. Govoni, A. Gulans, F. Gygi, J.M. Herbert, S. Kokott, T. Kühne, K.-H. Liou, T. Miyazaki, P. Motamarri, A. Nakata, J.E. Pask, C. Plessl, L.E. Ratcliff, R.M. Richard, M. Rossi, R. Schade, M. Scheffler, O. Schütt, P. Suryanarayana, M. Torrent, L. Truflandier, T.L. Windus, Q. Xu, V.W.-Z. Yu, D. Perez, ArXiv:2209.12747 (2022).","bibtex":"@article{Gavini_Baroni_Blum_Bowler_Buccheri_Chelikowsky_Das_Dawson_Delugas_Dogan_et al._2022, title={Roadmap on Electronic Structure Codes in the Exascale Era}, journal={arXiv:2209.12747}, author={Gavini, Vikram and Baroni, Stefano and Blum, Volker and Bowler, David R. and Buccheri, Alexander and Chelikowsky, James R. and Das, Sambit and Dawson, William and Delugas, Pietro and Dogan, Mehmet and et al.}, year={2022} }","apa":"Gavini, V., Baroni, S., Blum, V., Bowler, D. R., Buccheri, A., Chelikowsky, J. R., Das, S., Dawson, W., Delugas, P., Dogan, M., Draxl, C., Galli, G., Genovese, L., Giannozzi, P., Giantomassi, M., Gonze, X., Govoni, M., Gulans, A., Gygi, F., … Perez, D. (2022). Roadmap on Electronic Structure Codes in the Exascale Era. In <i>arXiv:2209.12747</i>.","chicago":"Gavini, Vikram, Stefano Baroni, Volker Blum, David R. Bowler, Alexander Buccheri, James R. Chelikowsky, Sambit Das, et al. “Roadmap on Electronic Structure Codes in the Exascale Era.” <i>ArXiv:2209.12747</i>, 2022.","ieee":"V. Gavini <i>et al.</i>, “Roadmap on Electronic Structure Codes in the Exascale Era,” <i>arXiv:2209.12747</i>. 2022.","ama":"Gavini V, Baroni S, Blum V, et al. Roadmap on Electronic Structure Codes in the Exascale Era. <i>arXiv:220912747</i>. Published online 2022."},"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"external_id":{"arxiv":["2209.12747"]},"_id":"46275","user_id":"75963","department":[{"_id":"27"}],"language":[{"iso":"eng"}],"type":"preprint","publication":"arXiv:2209.12747","abstract":[{"lang":"eng","text":"Electronic structure calculations have been instrumental in providing many\r\nimportant insights into a range of physical and chemical properties of various\r\nmolecular and solid-state systems. Their importance to various fields,\r\nincluding materials science, chemical sciences, computational chemistry and\r\ndevice physics, is underscored by the large fraction of available public\r\nsupercomputing resources devoted to these calculations. As we enter the\r\nexascale era, exciting new opportunities to increase simulation numbers, sizes,\r\nand accuracies present themselves. In order to realize these promises, the\r\ncommunity of electronic structure software developers will however first have\r\nto tackle a number of challenges pertaining to the efficient use of new\r\narchitectures that will rely heavily on massive parallelism and hardware\r\naccelerators. This roadmap provides a broad overview of the state-of-the-art in\r\nelectronic structure calculations and of the various new directions being\r\npursued by the community. It covers 14 electronic structure codes, presenting\r\ntheir current status, their development priorities over the next five years,\r\nand their plans towards tackling the challenges and leveraging the\r\nopportunities presented by the advent of exascale computing."}],"status":"public"},{"publication_status":"published","publication_identifier":{"issn":["0167-8191"]},"citation":{"apa":"Schade, R., Kenter, T., Elgabarty, H., Lass, M., Schütt, O., Lazzaro, A., Pabst, H., Mohr, S., Hutter, J., Kühne, T., &#38; Plessl, C. (2022). Towards electronic structure-based ab-initio molecular dynamics simulations with hundreds of millions of atoms. <i>Parallel Computing</i>, <i>111</i>, Article 102920. <a href=\"https://doi.org/10.1016/j.parco.2022.102920\">https://doi.org/10.1016/j.parco.2022.102920</a>","mla":"Schade, Robert, et al. “Towards Electronic Structure-Based Ab-Initio Molecular Dynamics Simulations with Hundreds of Millions of Atoms.” <i>Parallel Computing</i>, vol. 111, 102920, Elsevier BV, 2022, doi:<a href=\"https://doi.org/10.1016/j.parco.2022.102920\">10.1016/j.parco.2022.102920</a>.","bibtex":"@article{Schade_Kenter_Elgabarty_Lass_Schütt_Lazzaro_Pabst_Mohr_Hutter_Kühne_et al._2022, title={Towards electronic structure-based ab-initio molecular dynamics simulations with hundreds of millions of atoms}, volume={111}, DOI={<a href=\"https://doi.org/10.1016/j.parco.2022.102920\">10.1016/j.parco.2022.102920</a>}, number={102920}, journal={Parallel Computing}, publisher={Elsevier BV}, author={Schade, Robert and Kenter, Tobias and Elgabarty, Hossam and Lass, Michael and Schütt, Ole and Lazzaro, Alfio and Pabst, Hans and Mohr, Stephan and Hutter, Jürg and Kühne, Thomas and et al.}, year={2022} }","short":"R. Schade, T. Kenter, H. Elgabarty, M. Lass, O. Schütt, A. Lazzaro, H. Pabst, S. Mohr, J. Hutter, T. Kühne, C. Plessl, Parallel Computing 111 (2022).","ama":"Schade R, Kenter T, Elgabarty H, et al. Towards electronic structure-based ab-initio molecular dynamics simulations with hundreds of millions of atoms. <i>Parallel Computing</i>. 2022;111. doi:<a href=\"https://doi.org/10.1016/j.parco.2022.102920\">10.1016/j.parco.2022.102920</a>","ieee":"R. Schade <i>et al.</i>, “Towards electronic structure-based ab-initio molecular dynamics simulations with hundreds of millions of atoms,” <i>Parallel Computing</i>, vol. 111, Art. no. 102920, 2022, doi: <a href=\"https://doi.org/10.1016/j.parco.2022.102920\">10.1016/j.parco.2022.102920</a>.","chicago":"Schade, Robert, Tobias Kenter, Hossam Elgabarty, Michael Lass, Ole Schütt, Alfio Lazzaro, Hans Pabst, et al. “Towards Electronic Structure-Based Ab-Initio Molecular Dynamics Simulations with Hundreds of Millions of Atoms.” <i>Parallel Computing</i> 111 (2022). <a href=\"https://doi.org/10.1016/j.parco.2022.102920\">https://doi.org/10.1016/j.parco.2022.102920</a>."},"intvolume":"       111","author":[{"last_name":"Schade","orcid":"0000-0002-6268-539","full_name":"Schade, Robert","id":"75963","first_name":"Robert"},{"first_name":"Tobias","last_name":"Kenter","full_name":"Kenter, Tobias","id":"3145"},{"orcid":"0000-0002-4945-1481","last_name":"Elgabarty","id":"60250","full_name":"Elgabarty, Hossam","first_name":"Hossam"},{"first_name":"Michael","orcid":"0000-0002-5708-7632","last_name":"Lass","id":"24135","full_name":"Lass, Michael"},{"last_name":"Schütt","full_name":"Schütt, Ole","first_name":"Ole"},{"first_name":"Alfio","last_name":"Lazzaro","full_name":"Lazzaro, Alfio"},{"full_name":"Pabst, Hans","last_name":"Pabst","first_name":"Hans"},{"first_name":"Stephan","full_name":"Mohr, Stephan","last_name":"Mohr"},{"first_name":"Jürg","last_name":"Hutter","full_name":"Hutter, Jürg"},{"id":"49079","full_name":"Kühne, Thomas","last_name":"Kühne","first_name":"Thomas"},{"full_name":"Plessl, Christian","id":"16153","orcid":"0000-0001-5728-9982","last_name":"Plessl","first_name":"Christian"}],"volume":111,"oa":"1","date_updated":"2023-08-02T15:03:55Z","main_file_link":[{"url":"https://www.sciencedirect.com/science/article/pii/S0167819122000242","open_access":"1"}],"doi":"10.1016/j.parco.2022.102920","type":"journal_article","status":"public","user_id":"75963","department":[{"_id":"613"},{"_id":"27"},{"_id":"518"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"33684","article_number":"102920","quality_controlled":"1","year":"2022","date_created":"2022-10-11T08:17:02Z","publisher":"Elsevier BV","title":"Towards electronic structure-based ab-initio molecular dynamics simulations with hundreds of millions of atoms","publication":"Parallel Computing","language":[{"iso":"eng"}],"keyword":["Artificial Intelligence","Computer Graphics and Computer-Aided Design","Computer Networks and Communications","Hardware and Architecture","Theoretical Computer Science","Software"]},{"citation":{"ama":"Meyer M, Kenter T, Plessl C. In-depth FPGA Accelerator Performance Evaluation with Single Node Benchmarks from the HPC Challenge Benchmark Suite for Intel and Xilinx FPGAs using OpenCL. <i>Journal of Parallel and Distributed Computing</i>. Published online 2022. doi:<a href=\"https://doi.org/10.1016/j.jpdc.2021.10.007\">10.1016/j.jpdc.2021.10.007</a>","chicago":"Meyer, Marius, Tobias Kenter, and Christian Plessl. “In-Depth FPGA Accelerator Performance Evaluation with Single Node Benchmarks from the HPC Challenge Benchmark Suite for Intel and Xilinx FPGAs Using OpenCL.” <i>Journal of Parallel and Distributed Computing</i>, 2022. <a href=\"https://doi.org/10.1016/j.jpdc.2021.10.007\">https://doi.org/10.1016/j.jpdc.2021.10.007</a>.","ieee":"M. Meyer, T. Kenter, and C. Plessl, “In-depth FPGA Accelerator Performance Evaluation with Single Node Benchmarks from the HPC Challenge Benchmark Suite for Intel and Xilinx FPGAs using OpenCL,” <i>Journal of Parallel and Distributed Computing</i>, 2022, doi: <a href=\"https://doi.org/10.1016/j.jpdc.2021.10.007\">10.1016/j.jpdc.2021.10.007</a>.","short":"M. Meyer, T. Kenter, C. Plessl, Journal of Parallel and Distributed Computing (2022).","mla":"Meyer, Marius, et al. “In-Depth FPGA Accelerator Performance Evaluation with Single Node Benchmarks from the HPC Challenge Benchmark Suite for Intel and Xilinx FPGAs Using OpenCL.” <i>Journal of Parallel and Distributed Computing</i>, 2022, doi:<a href=\"https://doi.org/10.1016/j.jpdc.2021.10.007\">10.1016/j.jpdc.2021.10.007</a>.","bibtex":"@article{Meyer_Kenter_Plessl_2022, title={In-depth FPGA Accelerator Performance Evaluation with Single Node Benchmarks from the HPC Challenge Benchmark Suite for Intel and Xilinx FPGAs using OpenCL}, DOI={<a href=\"https://doi.org/10.1016/j.jpdc.2021.10.007\">10.1016/j.jpdc.2021.10.007</a>}, journal={Journal of Parallel and Distributed Computing}, author={Meyer, Marius and Kenter, Tobias and Plessl, Christian}, year={2022} }","apa":"Meyer, M., Kenter, T., &#38; Plessl, C. (2022). In-depth FPGA Accelerator Performance Evaluation with Single Node Benchmarks from the HPC Challenge Benchmark Suite for Intel and Xilinx FPGAs using OpenCL. <i>Journal of Parallel and Distributed Computing</i>. <a href=\"https://doi.org/10.1016/j.jpdc.2021.10.007\">https://doi.org/10.1016/j.jpdc.2021.10.007</a>"},"year":"2022","publication_identifier":{"issn":["0743-7315"]},"quality_controlled":"1","publication_status":"published","doi":"10.1016/j.jpdc.2021.10.007","title":"In-depth FPGA Accelerator Performance Evaluation with Single Node Benchmarks from the HPC Challenge Benchmark Suite for Intel and Xilinx FPGAs using OpenCL","date_created":"2021-11-10T14:36:27Z","author":[{"full_name":"Meyer, Marius","id":"40778","last_name":"Meyer","first_name":"Marius"},{"first_name":"Tobias","last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias"},{"first_name":"Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","id":"16153","full_name":"Plessl, Christian"}],"date_updated":"2023-09-26T10:26:56Z","status":"public","publication":"Journal of Parallel and Distributed Computing","type":"journal_article","language":[{"iso":"eng"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"15278","_id":"27364","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}]},{"oa":"1","date_updated":"2022-01-06T06:57:51Z","author":[{"full_name":"Menzel, Johannes","last_name":"Menzel","first_name":"Johannes"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153","first_name":"Christian"},{"id":"3145","full_name":"Kenter, Tobias","last_name":"Kenter","first_name":"Tobias"}],"volume":15,"main_file_link":[{"open_access":"1","url":"https://dl.acm.org/doi/10.1145/3491235"}],"doi":"10.1145/3491235","publication_status":"published","publication_identifier":{"issn":["1936-7406","1936-7414"]},"citation":{"ieee":"J. Menzel, C. Plessl, and T. Kenter, “The Strong Scaling Advantage of FPGAs in HPC for N-body Simulations,” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, vol. 15, no. 1, pp. 1–30, 2021, doi: <a href=\"https://doi.org/10.1145/3491235\">10.1145/3491235</a>.","chicago":"Menzel, Johannes, Christian Plessl, and Tobias Kenter. “The Strong Scaling Advantage of FPGAs in HPC for N-Body Simulations.” <i>ACM Transactions on Reconfigurable Technology and Systems</i> 15, no. 1 (2021): 1–30. <a href=\"https://doi.org/10.1145/3491235\">https://doi.org/10.1145/3491235</a>.","ama":"Menzel J, Plessl C, Kenter T. The Strong Scaling Advantage of FPGAs in HPC for N-body Simulations. <i>ACM Transactions on Reconfigurable Technology and Systems</i>. 2021;15(1):1-30. doi:<a href=\"https://doi.org/10.1145/3491235\">10.1145/3491235</a>","bibtex":"@article{Menzel_Plessl_Kenter_2021, title={The Strong Scaling Advantage of FPGAs in HPC for N-body Simulations}, volume={15}, DOI={<a href=\"https://doi.org/10.1145/3491235\">10.1145/3491235</a>}, number={1}, journal={ACM Transactions on Reconfigurable Technology and Systems}, author={Menzel, Johannes and Plessl, Christian and Kenter, Tobias}, year={2021}, pages={1–30} }","mla":"Menzel, Johannes, et al. “The Strong Scaling Advantage of FPGAs in HPC for N-Body Simulations.” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, vol. 15, no. 1, 2021, pp. 1–30, doi:<a href=\"https://doi.org/10.1145/3491235\">10.1145/3491235</a>.","short":"J. Menzel, C. Plessl, T. Kenter, ACM Transactions on Reconfigurable Technology and Systems 15 (2021) 1–30.","apa":"Menzel, J., Plessl, C., &#38; Kenter, T. (2021). The Strong Scaling Advantage of FPGAs in HPC for N-body Simulations. <i>ACM Transactions on Reconfigurable Technology and Systems</i>, <i>15</i>(1), 1–30. <a href=\"https://doi.org/10.1145/3491235\">https://doi.org/10.1145/3491235</a>"},"page":"1-30","intvolume":"        15","_id":"28099","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"article_type":"original","type":"journal_article","status":"public","date_created":"2021-11-30T10:00:31Z","title":"The Strong Scaling Advantage of FPGAs in HPC for N-body Simulations","quality_controlled":"1","issue":"1","year":"2021","language":[{"iso":"eng"}],"publication":"ACM Transactions on Reconfigurable Technology and Systems","abstract":[{"text":"N-body methods are one of the essential algorithmic building blocks of high-performance and parallel computing. Previous research has shown promising performance for implementing n-body simulations with pairwise force calculations on FPGAs. However, to avoid challenges with accumulation and memory access patterns, the presented designs calculate each pair of forces twice, along with both force sums of the involved particles. Also, they require large problem instances with hundreds of thousands of particles to reach their respective peak performance, limiting the applicability for strong scaling scenarios. This work addresses both issues by presenting a novel FPGA design that uses each calculated force twice and overlaps data transfers and computations in a way that allows to reach peak performance even for small problem instances, outperforming previous single precision results even in double precision, and scaling linearly over multiple interconnected FPGAs. For a comparison across architectures, we provide an equally optimized CPU reference, which for large problems actually achieves higher peak performance per device, however, given the strong scaling advantages of the FPGA design, in parallel setups with few thousand particles per device, the FPGA platform achieves highest performance and power efficiency.","lang":"eng"}]}]
