[{"place":"New York, NY, USA","page":"224-234","citation":{"ieee":"P. Stachura, X. Wu, C. Plessl, and Z. Fang, “SORCERI: Streaming Overlay Acceleration for Highly Contracted Electron Repulsion Integral Computations in Quantum Chemistry,” in <i>Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA ’26)</i>, 2026, pp. 224–234, doi: <a href=\"https://doi.org/10.1145/3748173.3779198\">10.1145/3748173.3779198</a>.","chicago":"Stachura, Philip, Xin Wu, Christian Plessl, and Zhenman Fang. “SORCERI: Streaming Overlay Acceleration for Highly Contracted Electron Repulsion Integral Computations in Quantum Chemistry.” In <i>Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA ’26)</i>, 224–34. New York, NY, USA: Association for Computing Machinery, 2026. <a href=\"https://doi.org/10.1145/3748173.3779198\">https://doi.org/10.1145/3748173.3779198</a>.","ama":"Stachura P, Wu X, Plessl C, Fang Z. SORCERI: Streaming Overlay Acceleration for Highly Contracted Electron Repulsion Integral Computations in Quantum Chemistry. In: <i>Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA ’26)</i>. Association for Computing Machinery; 2026:224-234. doi:<a href=\"https://doi.org/10.1145/3748173.3779198\">10.1145/3748173.3779198</a>","apa":"Stachura, P., Wu, X., Plessl, C., &#38; Fang, Z. (2026). SORCERI: Streaming Overlay Acceleration for Highly Contracted Electron Repulsion Integral Computations in Quantum Chemistry. <i>Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA ’26)</i>, 224–234. <a href=\"https://doi.org/10.1145/3748173.3779198\">https://doi.org/10.1145/3748173.3779198</a>","bibtex":"@inproceedings{Stachura_Wu_Plessl_Fang_2026, place={New York, NY, USA}, title={SORCERI: Streaming Overlay Acceleration for Highly Contracted Electron Repulsion Integral Computations in Quantum Chemistry}, DOI={<a href=\"https://doi.org/10.1145/3748173.3779198\">10.1145/3748173.3779198</a>}, booktitle={Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA ’26)}, publisher={Association for Computing Machinery}, author={Stachura, Philip and Wu, Xin and Plessl, Christian and Fang, Zhenman}, year={2026}, pages={224–234} }","mla":"Stachura, Philip, et al. “SORCERI: Streaming Overlay Acceleration for Highly Contracted Electron Repulsion Integral Computations in Quantum Chemistry.” <i>Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA ’26)</i>, Association for Computing Machinery, 2026, pp. 224–34, doi:<a href=\"https://doi.org/10.1145/3748173.3779198\">10.1145/3748173.3779198</a>.","short":"P. Stachura, X. Wu, C. Plessl, Z. Fang, in: Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA ’26), Association for Computing Machinery, New York, NY, USA, 2026, pp. 224–234."},"publication_identifier":{"isbn":["9798400720796"]},"publication_status":"published","doi":"10.1145/3748173.3779198","main_file_link":[{"url":"https://dl.acm.org/doi/10.1145/3748173.3779198"}],"date_updated":"2026-02-09T09:16:32Z","author":[{"first_name":"Philip","full_name":"Stachura, Philip","last_name":"Stachura"},{"last_name":"Wu","full_name":"Wu, Xin","id":"77439","first_name":"Xin"},{"first_name":"Christian","id":"16153","full_name":"Plessl, Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982"},{"last_name":"Fang","full_name":"Fang, Zhenman","first_name":"Zhenman"}],"status":"public","type":"conference","_id":"63890","project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"77439","year":"2026","title":"SORCERI: Streaming Overlay Acceleration for Highly Contracted Electron Repulsion Integral Computations in Quantum Chemistry","publisher":"Association for Computing Machinery","date_created":"2026-02-06T06:43:22Z","abstract":[{"text":"The computation of highly contracted electron repulsion integrals (ERIs) is essential to achieve quantum accuracy in atomistic simulations based on quantum mechanics. Its growing computational demands make energy efficiency a critical concern. Recent studies demonstrate FPGAs’ superior performance and energy efficiency for computing primitive ERIs, but the computation of highly contracted ERIs introduces significant algorithmic complexity and new design challenges for FPGA acceleration.In this work, we present SORCERI, the first streaming overlay acceleration for highly contracted ERI computations on FPGAs. SORCERI introduces a novel streaming Rys computing unit to calculate roots and weights of Rys polynomials on-chip, and a streaming contraction unit for the contraction of primitive ERIs. This shifts the design bottleneck from limited CPU-FPGA communication bandwidth to available FPGA computation resources. To address practical deployment challenges for a large number of quartet classes, we design three streaming overlays, together with an efficient memory transpose optimization, to cover the 21 most commonly used quartet classes in realistic atomistic simulations. To address the new computation constraints, we use flexible calculation stages with a free-running streaming architecture to achieve high DSP utilization and good timing closure.Experiments demonstrate that SORCERI achieves an average 5.96x, 1.99x, and 1.16x better performance per watt than libint on a 64-core AMD EPYC 7713 CPU, libintx on an Nvidia A40 GPU, and SERI, the prior best-performing FPGA design for primitive ERIs. Furthermore, SORCERI reaches a peak throughput of 44.11 GERIS (109 ERIs per second) that is 1.52x, 1.13x, and 1.93x greater than libint, libintx and SERI, respectively. SORCERI will be released soon at https://github.com/SFU-HiAccel/SORCERI.","lang":"eng"}],"publication":"Proceedings of the 2026 ACM/SIGDA International Symposium on Field Programmable Gate Arrays (FPGA '26)","keyword":["electron repulsion integrals","quantum chemistry","atomistic simulation","overlay architecture","fpga acceleration"],"language":[{"iso":"eng"}]},{"publisher":"IEEE","date_updated":"2026-03-24T09:04:31Z","author":[{"first_name":"Abdul Rehman","full_name":"Tareen, Abdul Rehman","id":"76938","last_name":"Tareen"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153","first_name":"Christian"},{"first_name":"Tobias","last_name":"Kenter","full_name":"Kenter, Tobias","id":"3145"}],"date_created":"2026-03-24T09:02:22Z","title":"Fast Multi-Tau Correlators on FPGA with Context Switching From and to High- Bandwidth Memory","doi":"10.1109/icfpt67023.2025.00027","publication_status":"published","year":"2026","citation":{"short":"A.R. Tareen, C. Plessl, T. Kenter, in: 2025 International Conference on Field Programmable Technology (ICFPT), IEEE, 2026.","bibtex":"@inproceedings{Tareen_Plessl_Kenter_2026, title={Fast Multi-Tau Correlators on FPGA with Context Switching From and to High- Bandwidth Memory}, DOI={<a href=\"https://doi.org/10.1109/icfpt67023.2025.00027\">10.1109/icfpt67023.2025.00027</a>}, booktitle={2025 International Conference on Field Programmable Technology (ICFPT)}, publisher={IEEE}, author={Tareen, Abdul Rehman and Plessl, Christian and Kenter, Tobias}, year={2026} }","mla":"Tareen, Abdul Rehman, et al. “Fast Multi-Tau Correlators on FPGA with Context Switching From and to High- Bandwidth Memory.” <i>2025 International Conference on Field Programmable Technology (ICFPT)</i>, IEEE, 2026, doi:<a href=\"https://doi.org/10.1109/icfpt67023.2025.00027\">10.1109/icfpt67023.2025.00027</a>.","apa":"Tareen, A. R., Plessl, C., &#38; Kenter, T. (2026). Fast Multi-Tau Correlators on FPGA with Context Switching From and to High- Bandwidth Memory. <i>2025 International Conference on Field Programmable Technology (ICFPT)</i>. <a href=\"https://doi.org/10.1109/icfpt67023.2025.00027\">https://doi.org/10.1109/icfpt67023.2025.00027</a>","ama":"Tareen AR, Plessl C, Kenter T. Fast Multi-Tau Correlators on FPGA with Context Switching From and to High- Bandwidth Memory. In: <i>2025 International Conference on Field Programmable Technology (ICFPT)</i>. IEEE; 2026. doi:<a href=\"https://doi.org/10.1109/icfpt67023.2025.00027\">10.1109/icfpt67023.2025.00027</a>","ieee":"A. R. Tareen, C. Plessl, and T. Kenter, “Fast Multi-Tau Correlators on FPGA with Context Switching From and to High- Bandwidth Memory,” 2026, doi: <a href=\"https://doi.org/10.1109/icfpt67023.2025.00027\">10.1109/icfpt67023.2025.00027</a>.","chicago":"Tareen, Abdul Rehman, Christian Plessl, and Tobias Kenter. “Fast Multi-Tau Correlators on FPGA with Context Switching From and to High- Bandwidth Memory.” In <i>2025 International Conference on Field Programmable Technology (ICFPT)</i>. IEEE, 2026. <a href=\"https://doi.org/10.1109/icfpt67023.2025.00027\">https://doi.org/10.1109/icfpt67023.2025.00027</a>."},"project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"65101","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"language":[{"iso":"eng"}],"type":"conference","publication":"2025 International Conference on Field Programmable Technology (ICFPT)","abstract":[{"text":"Various methods to measure the dynamic behavior of particles require the calculation of autocorrelation functions. For this purpose, fast multi-tau correlators have been developed in dedicated hardware, in software, and on FPGAs. However, for methods such as X-ray Photon Correlation Spectroscopy (XPCS), which requires to calculate the autocorrelation function independently for hundreds of thousands to millions of pixels from high-resolution detectors, current approaches rely on offline processing after data acquisition. Moreover, the internal pipeline state of so many independent correlators is far too large to keep it on-chip. In this work, we propose a design approach on FPGAs, where pipeline contexts are stored in off-chip HBM memory. Each compute unit iteratively loads the state for a single pixel, processes a short time series for this pixel, and afterwards writes back the context in a dataflow pipeline. We have implemented the required compute kernels with Vitis HLS and analyze resulting designs on an Alveo U280 card. The design achieves the expected performance and for the first time provides sufficient throughput for current high-end detectors used in XPCS.","lang":"eng"}],"status":"public"},{"author":[{"first_name":"Xin","last_name":"Wu","full_name":"Wu, Xin","id":"77439"},{"first_name":"Hossam","last_name":"Elgabarty","orcid":"0000-0002-4945-1481","full_name":"Elgabarty, Hossam","id":"60250"},{"first_name":"Vahideh","full_name":"Alizadeh, Vahideh","last_name":"Alizadeh"},{"last_name":"Henao Aristizabal","full_name":"Henao Aristizabal, Andres","id":"67235","first_name":"Andres"},{"first_name":"Frederik","last_name":"Zysk","id":"14757","full_name":"Zysk, Frederik"},{"orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian","first_name":"Christian"},{"full_name":"Ehlert, Sebastian","last_name":"Ehlert","first_name":"Sebastian"},{"full_name":"Hutter, Jürg","last_name":"Hutter","first_name":"Jürg"},{"last_name":"Kühne","id":"49079","full_name":"Kühne, Thomas D.","first_name":"Thomas D."}],"date_created":"2026-02-09T09:03:41Z","date_updated":"2026-02-09T09:17:07Z","main_file_link":[{"url":"https://arxiv.org/abs/2503.11867"}],"title":"Benchmarking semi-empirical quantum chemical methods on liquid water","citation":{"bibtex":"@article{Wu_Elgabarty_Alizadeh_Henao Aristizabal_Zysk_Plessl_Ehlert_Hutter_Kühne_2025, title={Benchmarking semi-empirical quantum chemical methods on liquid water}, author={Wu, Xin and Elgabarty, Hossam and Alizadeh, Vahideh and Henao Aristizabal, Andres and Zysk, Frederik and Plessl, Christian and Ehlert, Sebastian and Hutter, Jürg and Kühne, Thomas D.}, year={2025} }","short":"X. Wu, H. Elgabarty, V. Alizadeh, A. Henao Aristizabal, F. Zysk, C. Plessl, S. Ehlert, J. Hutter, T.D. Kühne, (2025).","mla":"Wu, Xin, et al. <i>Benchmarking Semi-Empirical Quantum Chemical Methods on Liquid Water</i>. 2025.","apa":"Wu, X., Elgabarty, H., Alizadeh, V., Henao Aristizabal, A., Zysk, F., Plessl, C., Ehlert, S., Hutter, J., &#38; Kühne, T. D. (2025). <i>Benchmarking semi-empirical quantum chemical methods on liquid water</i>.","ieee":"X. Wu <i>et al.</i>, “Benchmarking semi-empirical quantum chemical methods on liquid water.” 2025.","chicago":"Wu, Xin, Hossam Elgabarty, Vahideh Alizadeh, Andres Henao Aristizabal, Frederik Zysk, Christian Plessl, Sebastian Ehlert, Jürg Hutter, and Thomas D. Kühne. “Benchmarking Semi-Empirical Quantum Chemical Methods on Liquid Water,” 2025.","ama":"Wu X, Elgabarty H, Alizadeh V, et al. Benchmarking semi-empirical quantum chemical methods on liquid water. Published online 2025."},"year":"2025","user_id":"77439","department":[{"_id":"27"},{"_id":"2"}],"project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"64071","language":[{"iso":"eng"}],"type":"preprint","status":"public","abstract":[{"text":"Stimulated by the renewed interest and recent developments in semi-empirical quantum chemical (SQC) methods for noncovalent interactions, we examine the properties of liquid water at ambient conditions by means of molecular dynamics (MD) simulations, both with the conventional NDDO-type (neglect of diatomic differential overlap) methods, e.g. AM1 and PM6, and with DFTB-type (density-functional tight-binding) methods, e.g. DFTB2 and GFN-xTB. Besides the original parameter sets, some specifically reparametrized SQC methods (denoted as AM1-W, PM6-fm, and DFTB2-iBi) targeting various smaller water systems ranging from molecular clusters to bulk are considered as well. The quality of these different SQC methods for describing liquid water properties at ambient conditions are assessed by comparison to well-established experimental data and also to BLYP-D3 density functional theory-based ab initio MD simulations. Our analyses reveal that static and dynamics properties of bulk water are poorly described by all considered SQC methods with the original parameters, regardless of the underlying theoretical models, with most of the methods suffering from too weak hydrogen bonds and hence predicting a far too fluid water with highly distorted hydrogen bond kinetics. On the other hand, the reparametrized force-matchcd PM6-fm method is shown to be able to quantitatively reproduce the static and dynamic features of liquid water, and thus can be used as a computationally efficient alternative to electronic structure-based MD simulations for liquid water that requires extended length and time scales. DFTB2-iBi predicts a slightly overstructured water with reduced fluidity, whereas AM1-W gives an amorphous ice-like structure for water at ambient conditions.","lang":"eng"}]},{"title":"FINN-HPC: Closing the Gap for Energy-Efficient Neural Network Inference on FPGAs in HPC","conference":{"name":"The International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies 2025 (HEART 2025)","start_date":"2025-05-26","end_date":"2025-05-28","location":"Kumamoto, Japan"},"doi":"10.1145/3728179.3728189","publisher":"ACM","date_updated":"2025-06-23T08:39:26Z","author":[{"id":"67601","full_name":"Jungemann, Linus","last_name":"Jungemann","orcid":"0009-0003-9757-988X","first_name":"Linus"},{"first_name":"Bjarne","full_name":"Wintermann, Bjarne","last_name":"Wintermann"},{"last_name":"Riebler","id":"8961","full_name":"Riebler, Heinrich","first_name":"Heinrich"},{"first_name":"Christian","id":"16153","full_name":"Plessl, Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl"}],"date_created":"2025-05-05T11:12:47Z","place":"New York City","year":"2025","citation":{"apa":"Jungemann, L., Wintermann, B., Riebler, H., &#38; Plessl, C. (2025). FINN-HPC: Closing the Gap for Energy-Efficient Neural Network Inference on FPGAs in HPC. <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>. The International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies 2025 (HEART 2025), Kumamoto, Japan. <a href=\"https://doi.org/10.1145/3728179.3728189\">https://doi.org/10.1145/3728179.3728189</a>","short":"L. Jungemann, B. Wintermann, H. Riebler, C. Plessl, in: Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies, ACM, New York City, 2025.","bibtex":"@inproceedings{Jungemann_Wintermann_Riebler_Plessl_2025, place={New York City}, title={FINN-HPC: Closing the Gap for Energy-Efficient Neural Network Inference on FPGAs in HPC}, DOI={<a href=\"https://doi.org/10.1145/3728179.3728189\">10.1145/3728179.3728189</a>}, booktitle={Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies}, publisher={ACM}, author={Jungemann, Linus and Wintermann, Bjarne and Riebler, Heinrich and Plessl, Christian}, year={2025} }","mla":"Jungemann, Linus, et al. “FINN-HPC: Closing the Gap for Energy-Efficient Neural Network Inference on FPGAs in HPC.” <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>, ACM, 2025, doi:<a href=\"https://doi.org/10.1145/3728179.3728189\">10.1145/3728179.3728189</a>.","ieee":"L. Jungemann, B. Wintermann, H. Riebler, and C. Plessl, “FINN-HPC: Closing the Gap for Energy-Efficient Neural Network Inference on FPGAs in HPC,” presented at the The International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies 2025 (HEART 2025), Kumamoto, Japan, 2025, doi: <a href=\"https://doi.org/10.1145/3728179.3728189\">10.1145/3728179.3728189</a>.","chicago":"Jungemann, Linus, Bjarne Wintermann, Heinrich Riebler, and Christian Plessl. “FINN-HPC: Closing the Gap for Energy-Efficient Neural Network Inference on FPGAs in HPC.” In <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>. New York City: ACM, 2025. <a href=\"https://doi.org/10.1145/3728179.3728189\">https://doi.org/10.1145/3728179.3728189</a>.","ama":"Jungemann L, Wintermann B, Riebler H, Plessl C. FINN-HPC: Closing the Gap for Energy-Efficient Neural Network Inference on FPGAs in HPC. In: <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>. ACM; 2025. doi:<a href=\"https://doi.org/10.1145/3728179.3728189\">10.1145/3728179.3728189</a>"},"publication_status":"published","publication_identifier":{"unknown":["979-8-4007-1432-0/25/05"]},"language":[{"iso":"eng"}],"project":[{"_id":"296","name":"EKI-App: EKI-App: Energieeffiziente Künstliche Intelligenz im Rechenzentrum durch Approximation von tiefen neuronalen Netzen für Field-Programmable Gate Arrays"}],"_id":"59804","user_id":"67601","department":[{"_id":"518"}],"status":"public","type":"conference","publication":"Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies"},{"doi":"10.1145/3728179.3728190","conference":{"location":"Kumamoto, Japan","end_date":"2025-05-28","start_date":"2025-05-26","name":"The International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies 2025 (HEART 2025) "},"title":"AuroraFlow, an Easy-to-Use, Low-Latency FPGA Communication Solution Demonstrated on Multi-FPGA Neural Network Inference","author":[{"first_name":"Gerrit","full_name":"Pape, Gerrit","last_name":"Pape"},{"orcid":"0009-0000-0856-6250","last_name":"Wintermann","full_name":"Wintermann, Bjarne","id":"62900","first_name":"Bjarne"},{"id":"67601","full_name":"Jungemann, Linus","orcid":"0009-0003-9757-988X","last_name":"Jungemann","first_name":"Linus"},{"first_name":"Michael","last_name":"Lass","full_name":"Lass, Michael"},{"full_name":"Meyer, Marius","last_name":"Meyer","first_name":"Marius"},{"first_name":"Heinrich","id":"8961","full_name":"Riebler, Heinrich","last_name":"Riebler"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian"}],"date_created":"2025-05-06T09:53:41Z","date_updated":"2025-06-23T08:40:28Z","citation":{"apa":"Pape, G., Wintermann, B., Jungemann, L., Lass, M., Meyer, M., Riebler, H., &#38; Plessl, C. (2025). AuroraFlow, an Easy-to-Use, Low-Latency FPGA Communication Solution Demonstrated on Multi-FPGA Neural Network Inference. <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>. The International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies 2025 (HEART 2025) , Kumamoto, Japan. <a href=\"https://doi.org/10.1145/3728179.3728190\">https://doi.org/10.1145/3728179.3728190</a>","bibtex":"@inproceedings{Pape_Wintermann_Jungemann_Lass_Meyer_Riebler_Plessl_2025, title={AuroraFlow, an Easy-to-Use, Low-Latency FPGA Communication Solution Demonstrated on Multi-FPGA Neural Network Inference}, DOI={<a href=\"https://doi.org/10.1145/3728179.3728190\">10.1145/3728179.3728190</a>}, booktitle={Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies}, author={Pape, Gerrit and Wintermann, Bjarne and Jungemann, Linus and Lass, Michael and Meyer, Marius and Riebler, Heinrich and Plessl, Christian}, year={2025} }","mla":"Pape, Gerrit, et al. “AuroraFlow, an Easy-to-Use, Low-Latency FPGA Communication Solution Demonstrated on Multi-FPGA Neural Network Inference.” <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>, 2025, doi:<a href=\"https://doi.org/10.1145/3728179.3728190\">10.1145/3728179.3728190</a>.","short":"G. Pape, B. Wintermann, L. Jungemann, M. Lass, M. Meyer, H. Riebler, C. Plessl, in: Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies, 2025.","ama":"Pape G, Wintermann B, Jungemann L, et al. AuroraFlow, an Easy-to-Use, Low-Latency FPGA Communication Solution Demonstrated on Multi-FPGA Neural Network Inference. In: <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>. ; 2025. doi:<a href=\"https://doi.org/10.1145/3728179.3728190\">10.1145/3728179.3728190</a>","chicago":"Pape, Gerrit, Bjarne Wintermann, Linus Jungemann, Michael Lass, Marius Meyer, Heinrich Riebler, and Christian Plessl. “AuroraFlow, an Easy-to-Use, Low-Latency FPGA Communication Solution Demonstrated on Multi-FPGA Neural Network Inference.” In <i>Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies</i>, 2025. <a href=\"https://doi.org/10.1145/3728179.3728190\">https://doi.org/10.1145/3728179.3728190</a>.","ieee":"G. Pape <i>et al.</i>, “AuroraFlow, an Easy-to-Use, Low-Latency FPGA Communication Solution Demonstrated on Multi-FPGA Neural Network Inference,” presented at the The International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies 2025 (HEART 2025) , Kumamoto, Japan, 2025, doi: <a href=\"https://doi.org/10.1145/3728179.3728190\">10.1145/3728179.3728190</a>."},"year":"2025","publication_status":"published","language":[{"iso":"eng"}],"user_id":"67601","department":[{"_id":"27"}],"project":[{"_id":"296","name":"EKI-App: EKI-App: Energieeffiziente Künstliche Intelligenz im Rechenzentrum durch Approximation von tiefen neuronalen Netzen für Field-Programmable Gate Arrays"}],"_id":"59816","status":"public","type":"conference","publication":"Proceedings of the 15th International Symposium on Highly Efficient Accelerators and Reconfigurable Technologies"},{"type":"journal_article","status":"public","_id":"60298","project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"department":[{"_id":"27"}],"user_id":"75963","article_type":"original","article_number":"109689","publication_identifier":{"issn":["0010-4655"]},"publication_status":"published","intvolume":"       315","citation":{"ieee":"J. Wingenbach, D. Bauch, X. Ma, R. Schade, C. Plessl, and S. Schumacher, “PHOENIX – Paderborn highly optimized and energy efficient solver for two-dimensional nonlinear Schrödinger equations with integrated extensions,” <i>Computer Physics Communications</i>, vol. 315, Art. no. 109689, 2025, doi: <a href=\"https://doi.org/10.1016/j.cpc.2025.109689\">10.1016/j.cpc.2025.109689</a>.","chicago":"Wingenbach, Jan, David Bauch, Xuekai Ma, Robert Schade, Christian Plessl, and Stefan Schumacher. “PHOENIX – Paderborn Highly Optimized and Energy Efficient Solver for Two-Dimensional Nonlinear Schrödinger Equations with Integrated Extensions.” <i>Computer Physics Communications</i> 315 (2025). <a href=\"https://doi.org/10.1016/j.cpc.2025.109689\">https://doi.org/10.1016/j.cpc.2025.109689</a>.","ama":"Wingenbach J, Bauch D, Ma X, Schade R, Plessl C, Schumacher S. PHOENIX – Paderborn highly optimized and energy efficient solver for two-dimensional nonlinear Schrödinger equations with integrated extensions. <i>Computer Physics Communications</i>. 2025;315. doi:<a href=\"https://doi.org/10.1016/j.cpc.2025.109689\">10.1016/j.cpc.2025.109689</a>","short":"J. Wingenbach, D. Bauch, X. Ma, R. Schade, C. Plessl, S. Schumacher, Computer Physics Communications 315 (2025).","mla":"Wingenbach, Jan, et al. “PHOENIX – Paderborn Highly Optimized and Energy Efficient Solver for Two-Dimensional Nonlinear Schrödinger Equations with Integrated Extensions.” <i>Computer Physics Communications</i>, vol. 315, 109689, Elsevier BV, 2025, doi:<a href=\"https://doi.org/10.1016/j.cpc.2025.109689\">10.1016/j.cpc.2025.109689</a>.","bibtex":"@article{Wingenbach_Bauch_Ma_Schade_Plessl_Schumacher_2025, title={PHOENIX – Paderborn highly optimized and energy efficient solver for two-dimensional nonlinear Schrödinger equations with integrated extensions}, volume={315}, DOI={<a href=\"https://doi.org/10.1016/j.cpc.2025.109689\">10.1016/j.cpc.2025.109689</a>}, number={109689}, journal={Computer Physics Communications}, publisher={Elsevier BV}, author={Wingenbach, Jan and Bauch, David and Ma, Xuekai and Schade, Robert and Plessl, Christian and Schumacher, Stefan}, year={2025} }","apa":"Wingenbach, J., Bauch, D., Ma, X., Schade, R., Plessl, C., &#38; Schumacher, S. (2025). PHOENIX – Paderborn highly optimized and energy efficient solver for two-dimensional nonlinear Schrödinger equations with integrated extensions. <i>Computer Physics Communications</i>, <i>315</i>, Article 109689. <a href=\"https://doi.org/10.1016/j.cpc.2025.109689\">https://doi.org/10.1016/j.cpc.2025.109689</a>"},"date_updated":"2025-06-29T12:00:36Z","volume":315,"author":[{"full_name":"Wingenbach, Jan","id":"69187","last_name":"Wingenbach","first_name":"Jan"},{"id":"44172","full_name":"Bauch, David","last_name":"Bauch","first_name":"David"},{"last_name":"Ma","full_name":"Ma, Xuekai","id":"59416","first_name":"Xuekai"},{"last_name":"Schade","orcid":"0000-0002-6268-5397","full_name":"Schade, Robert","id":"75963","first_name":"Robert"},{"orcid":"0000-0001-5728-9982","last_name":"Plessl","full_name":"Plessl, Christian","id":"16153","first_name":"Christian"},{"full_name":"Schumacher, Stefan","id":"27271","last_name":"Schumacher","orcid":"0000-0003-4042-4951","first_name":"Stefan"}],"doi":"10.1016/j.cpc.2025.109689","publication":"Computer Physics Communications","abstract":[{"lang":"eng","text":"In this work, we introduce PHOENIX, a highly optimized explicit open-source solver for two-dimensional nonlinear Schrödinger equations with extensions. The nonlinear Schrödinger equation and its extensions (Gross-Pitaevskii equation) are widely studied to model and analyze complex phenomena in fields such as optics, condensed matter physics, fluid dynamics, and plasma physics. It serves as a powerful tool for understanding nonlinear wave dynamics, soliton formation, and the interplay between nonlinearity, dispersion, and diffraction. By extending the nonlinear Schrödinger equation, various physical effects such as non-Hermiticity, spin-orbit interaction, and quantum optical aspects can be incorporated. PHOENIX is designed to accommodate a wide range of applications by a straightforward extendability without the need for user knowledge of computing architectures or performance optimization. The high performance and power efficiency of PHOENIX are demonstrated on a wide range of entry-class to high-end consumer and high-performance computing GPUs and CPUs. Compared to a more conventional MATLAB implementation, a speedup of up to three orders of magnitude and energy savings of up to 99.8% are achieved. The performance is compared to a performance model showing that PHOENIX performs close to the relevant performance bounds in many situations. The possibilities of PHOENIX are demonstrated with a range of practical examples from the realm of nonlinear (quantum) photonics in planar microresonators with active media including exciton-polariton condensates. Examples range from solutions on very large grids, the use of local optimization algorithms, to Monte Carlo ensemble evolutions with quantum noise enabling the tomography of the system's quantum state."}],"language":[{"iso":"eng"}],"year":"2025","publisher":"Elsevier BV","date_created":"2025-06-23T07:38:52Z","title":"PHOENIX – Paderborn highly optimized and energy efficient solver for two-dimensional nonlinear Schrödinger equations with integrated extensions"},{"intvolume":"        81","citation":{"short":"M. Büttner, C. Alt, T. Kenter, H. Köstler, C. Plessl, V. Aizinger, The Journal of Supercomputing 81 (2025).","mla":"Büttner, Markus, et al. “Analyzing Performance Portability for a SYCL Implementation of the 2D Shallow Water Equations.” <i>The Journal of Supercomputing</i>, vol. 81, no. 6, 772, Springer Science and Business Media LLC, 2025, doi:<a href=\"https://doi.org/10.1007/s11227-025-07063-7\">10.1007/s11227-025-07063-7</a>.","bibtex":"@article{Büttner_Alt_Kenter_Köstler_Plessl_Aizinger_2025, title={Analyzing performance portability for a SYCL implementation of the 2D shallow water equations}, volume={81}, DOI={<a href=\"https://doi.org/10.1007/s11227-025-07063-7\">10.1007/s11227-025-07063-7</a>}, number={6772}, journal={The Journal of Supercomputing}, publisher={Springer Science and Business Media LLC}, author={Büttner, Markus and Alt, Christoph and Kenter, Tobias and Köstler, Harald and Plessl, Christian and Aizinger, Vadym}, year={2025} }","apa":"Büttner, M., Alt, C., Kenter, T., Köstler, H., Plessl, C., &#38; Aizinger, V. (2025). Analyzing performance portability for a SYCL implementation of the 2D shallow water equations. <i>The Journal of Supercomputing</i>, <i>81</i>(6), Article 772. <a href=\"https://doi.org/10.1007/s11227-025-07063-7\">https://doi.org/10.1007/s11227-025-07063-7</a>","ama":"Büttner M, Alt C, Kenter T, Köstler H, Plessl C, Aizinger V. Analyzing performance portability for a SYCL implementation of the 2D shallow water equations. <i>The Journal of Supercomputing</i>. 2025;81(6). doi:<a href=\"https://doi.org/10.1007/s11227-025-07063-7\">10.1007/s11227-025-07063-7</a>","ieee":"M. Büttner, C. Alt, T. Kenter, H. Köstler, C. Plessl, and V. Aizinger, “Analyzing performance portability for a SYCL implementation of the 2D shallow water equations,” <i>The Journal of Supercomputing</i>, vol. 81, no. 6, Art. no. 772, 2025, doi: <a href=\"https://doi.org/10.1007/s11227-025-07063-7\">10.1007/s11227-025-07063-7</a>.","chicago":"Büttner, Markus, Christoph Alt, Tobias Kenter, Harald Köstler, Christian Plessl, and Vadym Aizinger. “Analyzing Performance Portability for a SYCL Implementation of the 2D Shallow Water Equations.” <i>The Journal of Supercomputing</i> 81, no. 6 (2025). <a href=\"https://doi.org/10.1007/s11227-025-07063-7\">https://doi.org/10.1007/s11227-025-07063-7</a>."},"publication_identifier":{"issn":["1573-0484"]},"publication_status":"published","doi":"10.1007/s11227-025-07063-7","main_file_link":[{"open_access":"1"}],"date_updated":"2025-11-04T09:48:10Z","oa":"1","volume":81,"author":[{"first_name":"Markus","last_name":"Büttner","full_name":"Büttner, Markus"},{"last_name":"Alt","full_name":"Alt, Christoph","id":"100625","first_name":"Christoph"},{"last_name":"Kenter","full_name":"Kenter, Tobias","id":"3145","first_name":"Tobias"},{"first_name":"Harald","last_name":"Köstler","full_name":"Köstler, Harald"},{"first_name":"Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153"},{"first_name":"Vadym","last_name":"Aizinger","full_name":"Aizinger, Vadym"}],"status":"public","type":"journal_article","article_number":"772","_id":"62064","department":[{"_id":"27"},{"_id":"518"}],"user_id":"3145","year":"2025","quality_controlled":"1","issue":"6","title":"Analyzing performance portability for a SYCL implementation of the 2D shallow water equations","publisher":"Springer Science and Business Media LLC","date_created":"2025-11-04T09:37:50Z","abstract":[{"text":"SYCL is an open standard for targeting heterogeneous hardware from C++. In this work, we evaluate a SYCL implementation for a discontinuous Galerkin discretization of the 2D shallow water equations targeting CPUs, GPUs, and also FPGAs. The discretization uses polynomial orders zero to two on unstructured triangular meshes. Separating memory accesses from the numerical code allow us to optimize data accesses for the target architecture. A performance analysis shows good portability across x86 and ARM CPUs, GPUs from different vendors, and even two variants of Intel Stratix 10 FPGAs. Measuring the energy to solution shows that GPUs yield an up to 10x higher energy efficiency in terms of degrees of freedom per joule compared to CPUs. With custom designed caches, FPGAs offer a meaningful complement to the other architectures with particularly good computational performance on smaller meshes. FPGAs with High Bandwidth Memory are less affected by bandwidth issues and have similar energy efficiency as latest generation CPUs.","lang":"eng"}],"publication":"The Journal of Supercomputing","language":[{"iso":"eng"}]},{"citation":{"mla":"Alt, Christoph, et al. “Evaluating OneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to Multiple FPGAs.” <i>Proceedings of the 13th International Workshop on OpenCL and SYCL</i>, Association for Computing Machinery, 2025, doi:<a href=\"https://doi.org/10.1145/3731125.3731131\">10.1145/3731125.3731131</a>.","short":"C. Alt, C. Plessl, T. Kenter, in: Proceedings of the 13th International Workshop on OpenCL and SYCL, Association for Computing Machinery, New York, NY, USA, 2025.","bibtex":"@inproceedings{Alt_Plessl_Kenter_2025, place={New York, NY, USA}, series={IWOCL ’25}, title={Evaluating oneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to multiple FPGAs}, DOI={<a href=\"https://doi.org/10.1145/3731125.3731131\">10.1145/3731125.3731131</a>}, booktitle={Proceedings of the 13th International Workshop on OpenCL and SYCL}, publisher={Association for Computing Machinery}, author={Alt, Christoph and Plessl, Christian and Kenter, Tobias}, year={2025}, collection={IWOCL ’25} }","apa":"Alt, C., Plessl, C., &#38; Kenter, T. (2025). Evaluating oneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to multiple FPGAs. <i>Proceedings of the 13th International Workshop on OpenCL and SYCL</i>. <a href=\"https://doi.org/10.1145/3731125.3731131\">https://doi.org/10.1145/3731125.3731131</a>","chicago":"Alt, Christoph, Christian Plessl, and Tobias Kenter. “Evaluating OneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to Multiple FPGAs.” In <i>Proceedings of the 13th International Workshop on OpenCL and SYCL</i>. IWOCL ’25. New York, NY, USA: Association for Computing Machinery, 2025. <a href=\"https://doi.org/10.1145/3731125.3731131\">https://doi.org/10.1145/3731125.3731131</a>.","ieee":"C. Alt, C. Plessl, and T. Kenter, “Evaluating oneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to multiple FPGAs,” 2025, doi: <a href=\"https://doi.org/10.1145/3731125.3731131\">10.1145/3731125.3731131</a>.","ama":"Alt C, Plessl C, Kenter T. Evaluating oneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to multiple FPGAs. In: <i>Proceedings of the 13th International Workshop on OpenCL and SYCL</i>. IWOCL ’25. Association for Computing Machinery; 2025. doi:<a href=\"https://doi.org/10.1145/3731125.3731131\">10.1145/3731125.3731131</a>"},"place":"New York, NY, USA","publication_identifier":{"isbn":["9798400713606"]},"main_file_link":[{"open_access":"1"}],"doi":"10.1145/3731125.3731131","author":[{"first_name":"Christoph","last_name":"Alt","full_name":"Alt, Christoph","id":"100625"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian"},{"first_name":"Tobias","id":"3145","full_name":"Kenter, Tobias","last_name":"Kenter"}],"oa":"1","date_updated":"2025-11-04T09:47:26Z","status":"public","type":"conference","series_title":"IWOCL ’25","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"62066","year":"2025","quality_controlled":"1","title":"Evaluating oneAPI I/O Pipes in a Case Study of Scaling a SYCL Jacobi Solver to multiple FPGAs","date_created":"2025-11-04T09:45:23Z","publisher":"Association for Computing Machinery","abstract":[{"text":"In the context of high-performance computing (HPC) for distributed workloads, individual field-programmable gate arrays (FPGAs) need efficient ways to exchange data, which requires network infrastructure and software abstractions. Dedicated multi-FPGA clusters provide inter-FPGA networks for direct device to device communication. The oneAPI high-level synthesis toolchain offers I/O pipes to allow user kernels to interact with the networking ports of the FPGA board. In this work, we evaluate using oneAPI I/O pipes for direct FPGA-to-FPGA communication by scaling a SYCL implementation of a Jacobi solver on up to 25 FPGAs in the Noctua 2 cluster. We see good results in weak and strong scaling experiments.","lang":"eng"}],"publication":"Proceedings of the 13th International Workshop on OpenCL and SYCL","language":[{"iso":"eng"}],"keyword":["Multi-FPGA","High-level Synthesis","oneAPI","FPGA"]},{"language":[{"iso":"eng"}],"ddc":["004"],"keyword":["Otus","Supercomputer","FPGA","PC2","Paderborn Center for Parallel Computing","Noctua 2","HPC"],"report_number":"PC2TR-2025-1","file":[{"date_updated":"2026-03-25T11:50:30Z","date_created":"2025-12-09T09:19:12Z","creator":"deffel","file_size":4535595,"access_level":"open_access","file_name":"2512.07401v1.pdf","file_id":"62982","content_type":"application/pdf","relation":"main_file"}],"abstract":[{"lang":"eng","text":"Otus is a high-performance computing cluster that was launched in 2025 and is operated by the Paderborn Center for Parallel Computing (PC2) at Paderborn University in Germany. The system is part of the National High Performance Computing (NHR) initiative. Otus complements the previous supercomputer Noctua 2, offering approximately twice the computing power while retaining the three node types that were characteristic of Noctua 2: 1) CPU compute nodes with different memory capacities, 2) high-end GPU nodes, and 3) HPC-grade FPGA nodes. On the Top500 list, which ranks the 500 most powerful supercomputers in the world, Otus is in position 164 with the CPU partition and in position 255 with the GPU partition (June 2025). On the Green500 list, ranking the 500 most energy-efficient supercomputers in the world, Otus is in position 5 with the GPU partition (June 2025).\r\n\r\n\r\nThis article provides a comprehensive overview of the system in terms of its hardware, software, system integration, and its overall integration into the data center building to ensure energy-efficient operation. The article aims to provide unique insights for scientists using the system and for other centers operating HPC clusters. The article will be continuously updated to reflect the latest system setup and measurements. "}],"date_created":"2025-12-09T09:11:04Z","publisher":"Paderborn Center for Parallel Computing (PC2)","title":"Otus Supercomputer","year":"2025","user_id":"23522","series_title":"PC2 Tech­nic­al Re­port Series","department":[{"_id":"27"},{"_id":"518"}],"_id":"62981","file_date_updated":"2026-03-25T11:50:30Z","type":"report","status":"public","author":[{"id":"116116","full_name":"Ehtesabi, Sadaf","last_name":"Ehtesabi","first_name":"Sadaf"},{"first_name":"Manoar","orcid":"https://orcid.org/0000-0002-0737-7981","last_name":"Hossain","full_name":"Hossain, Manoar","id":"114619"},{"first_name":"Tobias","last_name":"Kenter","full_name":"Kenter, Tobias","id":"3145"},{"first_name":"Andreas","last_name":"Krawinkel","id":"15275","full_name":"Krawinkel, Andreas"},{"first_name":"Lukas","last_name":"Ostermann","full_name":"Ostermann, Lukas","id":"69976"},{"first_name":"Christian","full_name":"Plessl, Christian","id":"16153","last_name":"Plessl","orcid":"0000-0001-5728-9982"},{"full_name":"Riebler, Heinrich","id":"8961","last_name":"Riebler","first_name":"Heinrich"},{"first_name":"Stefan","full_name":"Rohde, Stefan","id":"34009","last_name":"Rohde"},{"orcid":"0000-0002-6268-5397","last_name":"Schade","full_name":"Schade, Robert","id":"75963","first_name":"Robert"},{"last_name":"Schwarz","id":"5312","full_name":"Schwarz, Michael","first_name":"Michael"},{"first_name":"Jens","last_name":"Simon","full_name":"Simon, Jens","id":"15273"},{"first_name":"Nils","id":"61189","full_name":"Winnwa, Nils","last_name":"Winnwa"},{"orcid":"0000-0003-1764-9773","last_name":"Wiens","id":"23522","full_name":"Wiens, Alex","first_name":"Alex"},{"first_name":"Xin","id":"77439","full_name":"Wu, Xin","last_name":"Wu"}],"volume":1,"oa":"1","date_updated":"2026-03-25T11:50:31Z","doi":"10.48550/ARXIV.2512.07401","publication_status":"published","has_accepted_license":"1","citation":{"apa":"Ehtesabi, S., Hossain, M., Kenter, T., Krawinkel, A., Ostermann, L., Plessl, C., Riebler, H., Rohde, S., Schade, R., Schwarz, M., Simon, J., Winnwa, N., Wiens, A., &#38; Wu, X. (2025). <i>Otus Supercomputer</i> (Vol. 1). Paderborn Center for Parallel Computing (PC2). <a href=\"https://doi.org/10.48550/ARXIV.2512.07401\">https://doi.org/10.48550/ARXIV.2512.07401</a>","short":"S. Ehtesabi, M. Hossain, T. Kenter, A. Krawinkel, L. Ostermann, C. Plessl, H. Riebler, S. Rohde, R. Schade, M. Schwarz, J. Simon, N. Winnwa, A. Wiens, X. Wu, Otus Supercomputer, Paderborn Center for Parallel Computing (PC2), Paderborn, 2025.","bibtex":"@book{Ehtesabi_Hossain_Kenter_Krawinkel_Ostermann_Plessl_Riebler_Rohde_Schade_Schwarz_et al._2025, place={Paderborn}, series={PC2 Tech­nic­al Re­port Series}, title={Otus Supercomputer}, volume={1}, DOI={<a href=\"https://doi.org/10.48550/ARXIV.2512.07401\">10.48550/ARXIV.2512.07401</a>}, publisher={Paderborn Center for Parallel Computing (PC2)}, author={Ehtesabi, Sadaf and Hossain, Manoar and Kenter, Tobias and Krawinkel, Andreas and Ostermann, Lukas and Plessl, Christian and Riebler, Heinrich and Rohde, Stefan and Schade, Robert and Schwarz, Michael and et al.}, year={2025}, collection={PC2 Tech­nic­al Re­port Series} }","mla":"Ehtesabi, Sadaf, et al. <i>Otus Supercomputer</i>. Paderborn Center for Parallel Computing (PC2), 2025, doi:<a href=\"https://doi.org/10.48550/ARXIV.2512.07401\">10.48550/ARXIV.2512.07401</a>.","chicago":"Ehtesabi, Sadaf, Manoar Hossain, Tobias Kenter, Andreas Krawinkel, Lukas Ostermann, Christian Plessl, Heinrich Riebler, et al. <i>Otus Supercomputer</i>. Vol. 1. PC2 Tech­nic­al Re­port Series. Paderborn: Paderborn Center for Parallel Computing (PC2), 2025. <a href=\"https://doi.org/10.48550/ARXIV.2512.07401\">https://doi.org/10.48550/ARXIV.2512.07401</a>.","ieee":"S. Ehtesabi <i>et al.</i>, <i>Otus Supercomputer</i>, vol. 1. Paderborn: Paderborn Center for Parallel Computing (PC2), 2025.","ama":"Ehtesabi S, Hossain M, Kenter T, et al. <i>Otus Supercomputer</i>. Vol 1. Paderborn Center for Parallel Computing (PC2); 2025. doi:<a href=\"https://doi.org/10.48550/ARXIV.2512.07401\">10.48550/ARXIV.2512.07401</a>"},"page":"33","intvolume":"         1","place":"Paderborn"},{"volume":26,"author":[{"orcid":"0000-0002-5708-7632","last_name":"Lass","id":"24135","full_name":"Lass, Michael","first_name":"Michael"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153","first_name":"Christian"},{"last_name":"Brehm","id":"100167","full_name":"Brehm, Martin","first_name":"Martin"}],"date_created":"2024-04-12T18:31:39Z","date_updated":"2024-04-12T18:34:32Z","publisher":"MDPI AG","doi":"10.3390/e26040322","title":"Characterizing Microheterogeneity in Liquid Mixtures via Local Density Fluctuations","issue":"4","publication_identifier":{"issn":["1099-4300"]},"publication_status":"published","intvolume":"        26","citation":{"apa":"Lass, M., Kenter, T., Plessl, C., &#38; Brehm, M. (2024). Characterizing Microheterogeneity in Liquid Mixtures via Local Density Fluctuations. <i>Entropy</i>, <i>26</i>(4), Article 322. <a href=\"https://doi.org/10.3390/e26040322\">https://doi.org/10.3390/e26040322</a>","bibtex":"@article{Lass_Kenter_Plessl_Brehm_2024, title={Characterizing Microheterogeneity in Liquid Mixtures via Local Density Fluctuations}, volume={26}, DOI={<a href=\"https://doi.org/10.3390/e26040322\">10.3390/e26040322</a>}, number={4322}, journal={Entropy}, publisher={MDPI AG}, author={Lass, Michael and Kenter, Tobias and Plessl, Christian and Brehm, Martin}, year={2024} }","short":"M. Lass, T. Kenter, C. Plessl, M. Brehm, Entropy 26 (2024).","mla":"Lass, Michael, et al. “Characterizing Microheterogeneity in Liquid Mixtures via Local Density Fluctuations.” <i>Entropy</i>, vol. 26, no. 4, 322, MDPI AG, 2024, doi:<a href=\"https://doi.org/10.3390/e26040322\">10.3390/e26040322</a>.","chicago":"Lass, Michael, Tobias Kenter, Christian Plessl, and Martin Brehm. “Characterizing Microheterogeneity in Liquid Mixtures via Local Density Fluctuations.” <i>Entropy</i> 26, no. 4 (2024). <a href=\"https://doi.org/10.3390/e26040322\">https://doi.org/10.3390/e26040322</a>.","ieee":"M. Lass, T. Kenter, C. Plessl, and M. Brehm, “Characterizing Microheterogeneity in Liquid Mixtures via Local Density Fluctuations,” <i>Entropy</i>, vol. 26, no. 4, Art. no. 322, 2024, doi: <a href=\"https://doi.org/10.3390/e26040322\">10.3390/e26040322</a>.","ama":"Lass M, Kenter T, Plessl C, Brehm M. Characterizing Microheterogeneity in Liquid Mixtures via Local Density Fluctuations. <i>Entropy</i>. 2024;26(4). doi:<a href=\"https://doi.org/10.3390/e26040322\">10.3390/e26040322</a>"},"year":"2024","department":[{"_id":"27"},{"_id":"518"},{"_id":"803"}],"user_id":"24135","_id":"53474","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"language":[{"iso":"eng"}],"article_number":"322","publication":"Entropy","type":"journal_article","status":"public","abstract":[{"text":"We present a novel approach to characterize and quantify microheterogeneity and microphase separation in computer simulations of complex liquid mixtures. Our post-processing method is based on local density fluctuations of the different constituents in sampling spheres of varying size. It can be easily applied to both molecular dynamics (MD) and Monte Carlo (MC) simulations, including periodic boundary conditions. Multidimensional correlation of the density distributions yields a clear picture of the domain formation due to the subtle balance of different interactions. We apply our approach to the example of force field molecular dynamics simulations of imidazolium-based ionic liquids with different side chain lengths at different temperatures, namely 1-ethyl-3-methylimidazolium chloride, 1-hexyl-3-methylimidazolium chloride, and 1-decyl-3-methylimidazolium chloride, which are known to form distinct liquid domains. We put the results into the context of existing microheterogeneity analyses and demonstrate the advantages and sensitivity of our novel method. Furthermore, we show how to estimate the configuration entropy from our analysis, and we investigate voids in the system. The analysis has been implemented into our program package TRAVIS and is thus available as free software.","lang":"eng"}]},{"oa":"1","date_updated":"2024-04-26T08:44:30Z","author":[{"full_name":"Bauer, Carsten","id":"90082","last_name":"Bauer","first_name":"Carsten"},{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"full_name":"Lass, Michael","id":"24135","last_name":"Lass","orcid":"0000-0002-5708-7632","first_name":"Michael"},{"first_name":"Lukas","last_name":"Mazur","orcid":" 0000-0001-6304-7082","full_name":"Mazur, Lukas","id":"90492"},{"first_name":"Marius","id":"40778","full_name":"Meyer, Marius","last_name":"Meyer"},{"first_name":"Holger","full_name":"Nitsche, Holger","id":"15272","last_name":"Nitsche"},{"first_name":"Heinrich","last_name":"Riebler","id":"8961","full_name":"Riebler, Heinrich"},{"first_name":"Robert","id":"75963","full_name":"Schade, Robert","orcid":"0000-0002-6268-5397","last_name":"Schade"},{"full_name":"Schwarz, Michael","id":"5312","last_name":"Schwarz","first_name":"Michael"},{"first_name":"Nils","last_name":"Winnwa","id":"61189","full_name":"Winnwa, Nils"},{"first_name":"Alex","id":"23522","full_name":"Wiens, Alex","orcid":"0000-0003-1764-9773","last_name":"Wiens"},{"id":"77439","full_name":"Wu, Xin","last_name":"Wu","first_name":"Xin"},{"id":"16153","full_name":"Plessl, Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","first_name":"Christian"},{"first_name":"Jens","id":"15273","full_name":"Simon, Jens","last_name":"Simon"}],"volume":9,"doi":"10.17815/jlsrf-8-187 ","publication_status":"published","has_accepted_license":"1","citation":{"apa":"Bauer, C., Kenter, T., Lass, M., Mazur, L., Meyer, M., Nitsche, H., Riebler, H., Schade, R., Schwarz, M., Winnwa, N., Wiens, A., Wu, X., Plessl, C., &#38; Simon, J. (2024). Noctua 2 Supercomputer. <i>Journal of Large-Scale Research Facilities</i>, <i>9</i>. <a href=\"https://doi.org/10.17815/jlsrf-8-187 \">https://doi.org/10.17815/jlsrf-8-187 </a>","mla":"Bauer, Carsten, et al. “Noctua 2 Supercomputer.” <i>Journal of Large-Scale Research Facilities</i>, vol. 9, 2024, doi:<a href=\"https://doi.org/10.17815/jlsrf-8-187 \">10.17815/jlsrf-8-187 </a>.","bibtex":"@article{Bauer_Kenter_Lass_Mazur_Meyer_Nitsche_Riebler_Schade_Schwarz_Winnwa_et al._2024, title={Noctua 2 Supercomputer}, volume={9}, DOI={<a href=\"https://doi.org/10.17815/jlsrf-8-187 \">10.17815/jlsrf-8-187 </a>}, journal={Journal of large-scale research facilities}, author={Bauer, Carsten and Kenter, Tobias and Lass, Michael and Mazur, Lukas and Meyer, Marius and Nitsche, Holger and Riebler, Heinrich and Schade, Robert and Schwarz, Michael and Winnwa, Nils and et al.}, year={2024} }","short":"C. Bauer, T. Kenter, M. Lass, L. Mazur, M. Meyer, H. Nitsche, H. Riebler, R. Schade, M. Schwarz, N. Winnwa, A. Wiens, X. Wu, C. Plessl, J. Simon, Journal of Large-Scale Research Facilities 9 (2024).","ama":"Bauer C, Kenter T, Lass M, et al. Noctua 2 Supercomputer. <i>Journal of large-scale research facilities</i>. 2024;9. doi:<a href=\"https://doi.org/10.17815/jlsrf-8-187 \">10.17815/jlsrf-8-187 </a>","chicago":"Bauer, Carsten, Tobias Kenter, Michael Lass, Lukas Mazur, Marius Meyer, Holger Nitsche, Heinrich Riebler, et al. “Noctua 2 Supercomputer.” <i>Journal of Large-Scale Research Facilities</i> 9 (2024). <a href=\"https://doi.org/10.17815/jlsrf-8-187 \">https://doi.org/10.17815/jlsrf-8-187 </a>.","ieee":"C. Bauer <i>et al.</i>, “Noctua 2 Supercomputer,” <i>Journal of large-scale research facilities</i>, vol. 9, 2024, doi: <a href=\"https://doi.org/10.17815/jlsrf-8-187 \">10.17815/jlsrf-8-187 </a>."},"intvolume":"         9","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"53663","user_id":"8961","department":[{"_id":"27"},{"_id":"518"}],"article_type":"original","file_date_updated":"2024-04-26T08:35:17Z","type":"journal_article","status":"public","date_created":"2024-04-26T07:39:41Z","title":"Noctua 2 Supercomputer","year":"2024","ddc":["004"],"keyword":["Noctua 2","Supercomputer","FPGA","PC2","Paderborn Center for Parallel Computing"],"language":[{"iso":"eng"}],"publication":"Journal of large-scale research facilities","abstract":[{"lang":"eng","text":"Noctua 2 is a supercomputer operated at the Paderborn Center for Parallel Computing (PC2) at Paderborn University in Germany. Noctua 2 was inaugurated in 2022 and is an Atos BullSequana XH2000 system. It consists mainly of three node types: 1) CPU Compute nodes with AMD EPYC processors in different main memory configurations, 2) GPU nodes with NVIDIA A100 GPUs, and 3) FPGA nodes with Xilinx Alveo U280 and Intel Stratix 10 FPGA cards. While CPUs and GPUs are known off-the-shelf components in HPC systems, the operation of a large number of FPGA cards from different vendors and a dedicated FPGA-to-FPGA network are unique characteristics of Noctua 2. This paper describes in detail the overall setup of Noctua 2 and gives insights into the operation of the cluster from a hardware, software and facility perspective."}],"file":[{"content_type":"application/pdf","relation":"main_file","date_updated":"2024-04-26T08:35:17Z","date_created":"2024-04-26T07:30:20Z","creator":"deffel","file_size":3825480,"file_name":"Noctua2_Supercomputer.pdf","file_id":"53664","access_level":"open_access"}]},{"language":[{"iso":"eng"}],"_id":"56605","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"3145","status":"public","publication":"2024 34th International Conference on Field-Programmable Logic and Applications (FPL)","type":"conference","title":"StencilStream: A SYCL-based Stencil Simulation Framework Targeting FPGAs","doi":"10.1109/fpl64840.2024.00023","publisher":"IEEE","date_updated":"2024-10-14T07:56:26Z","author":[{"full_name":"Opdenhövel, Jan-Oliver","id":"73960","last_name":"Opdenhövel","orcid":"0000-0003-2314-2784","first_name":"Jan-Oliver"},{"full_name":"Alt, Christoph","id":"100625","last_name":"Alt","first_name":"Christoph"},{"first_name":"Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153"},{"first_name":"Tobias","last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias"}],"date_created":"2024-10-14T07:49:24Z","year":"2024","citation":{"ama":"Opdenhövel J-O, Alt C, Plessl C, Kenter T. StencilStream: A SYCL-based Stencil Simulation Framework Targeting FPGAs. In: <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/fpl64840.2024.00023\">10.1109/fpl64840.2024.00023</a>","ieee":"J.-O. Opdenhövel, C. Alt, C. Plessl, and T. Kenter, “StencilStream: A SYCL-based Stencil Simulation Framework Targeting FPGAs,” 2024, doi: <a href=\"https://doi.org/10.1109/fpl64840.2024.00023\">10.1109/fpl64840.2024.00023</a>.","chicago":"Opdenhövel, Jan-Oliver, Christoph Alt, Christian Plessl, and Tobias Kenter. “StencilStream: A SYCL-Based Stencil Simulation Framework Targeting FPGAs.” In <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/fpl64840.2024.00023\">https://doi.org/10.1109/fpl64840.2024.00023</a>.","apa":"Opdenhövel, J.-O., Alt, C., Plessl, C., &#38; Kenter, T. (2024). StencilStream: A SYCL-based Stencil Simulation Framework Targeting FPGAs. <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>. <a href=\"https://doi.org/10.1109/fpl64840.2024.00023\">https://doi.org/10.1109/fpl64840.2024.00023</a>","short":"J.-O. Opdenhövel, C. Alt, C. Plessl, T. Kenter, in: 2024 34th International Conference on Field-Programmable Logic and Applications (FPL), IEEE, 2024.","bibtex":"@inproceedings{Opdenhövel_Alt_Plessl_Kenter_2024, title={StencilStream: A SYCL-based Stencil Simulation Framework Targeting FPGAs}, DOI={<a href=\"https://doi.org/10.1109/fpl64840.2024.00023\">10.1109/fpl64840.2024.00023</a>}, booktitle={2024 34th International Conference on Field-Programmable Logic and Applications (FPL)}, publisher={IEEE}, author={Opdenhövel, Jan-Oliver and Alt, Christoph and Plessl, Christian and Kenter, Tobias}, year={2024} }","mla":"Opdenhövel, Jan-Oliver, et al. “StencilStream: A SYCL-Based Stencil Simulation Framework Targeting FPGAs.” <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/fpl64840.2024.00023\">10.1109/fpl64840.2024.00023</a>."},"quality_controlled":"1","publication_status":"published"},{"type":"conference","publication":"2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","status":"public","_id":"56607","user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"language":[{"iso":"eng"}],"publication_status":"published","quality_controlled":"1","year":"2024","citation":{"ama":"Tareen AR, Meyer M, Plessl C, Kenter T. HiHiSpMV: Sparse Matrix Vector Multiplication with Hierarchical Row Reductions on FPGAs with High Bandwidth Memory. In: <i>2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>. Vol 35. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/fccm60383.2024.00014\">10.1109/fccm60383.2024.00014</a>","chicago":"Tareen, Abdul Rehman, Marius Meyer, Christian Plessl, and Tobias Kenter. “HiHiSpMV: Sparse Matrix Vector Multiplication with Hierarchical Row Reductions on FPGAs with High Bandwidth Memory.” In <i>2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, Vol. 35. IEEE, 2024. <a href=\"https://doi.org/10.1109/fccm60383.2024.00014\">https://doi.org/10.1109/fccm60383.2024.00014</a>.","ieee":"A. R. Tareen, M. Meyer, C. Plessl, and T. Kenter, “HiHiSpMV: Sparse Matrix Vector Multiplication with Hierarchical Row Reductions on FPGAs with High Bandwidth Memory,” in <i>2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 2024, vol. 35, doi: <a href=\"https://doi.org/10.1109/fccm60383.2024.00014\">10.1109/fccm60383.2024.00014</a>.","short":"A.R. Tareen, M. Meyer, C. Plessl, T. Kenter, in: 2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), IEEE, 2024.","mla":"Tareen, Abdul Rehman, et al. “HiHiSpMV: Sparse Matrix Vector Multiplication with Hierarchical Row Reductions on FPGAs with High Bandwidth Memory.” <i>2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, vol. 35, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/fccm60383.2024.00014\">10.1109/fccm60383.2024.00014</a>.","bibtex":"@inproceedings{Tareen_Meyer_Plessl_Kenter_2024, title={HiHiSpMV: Sparse Matrix Vector Multiplication with Hierarchical Row Reductions on FPGAs with High Bandwidth Memory}, volume={35}, DOI={<a href=\"https://doi.org/10.1109/fccm60383.2024.00014\">10.1109/fccm60383.2024.00014</a>}, booktitle={2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)}, publisher={IEEE}, author={Tareen, Abdul Rehman and Meyer, Marius and Plessl, Christian and Kenter, Tobias}, year={2024} }","apa":"Tareen, A. R., Meyer, M., Plessl, C., &#38; Kenter, T. (2024). HiHiSpMV: Sparse Matrix Vector Multiplication with Hierarchical Row Reductions on FPGAs with High Bandwidth Memory. <i>2024 IEEE 32nd Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, <i>35</i>. <a href=\"https://doi.org/10.1109/fccm60383.2024.00014\">https://doi.org/10.1109/fccm60383.2024.00014</a>"},"intvolume":"        35","publisher":"IEEE","date_updated":"2024-10-14T12:27:55Z","author":[{"first_name":"Abdul Rehman","last_name":"Tareen","id":"76938","full_name":"Tareen, Abdul Rehman"},{"full_name":"Meyer, Marius","id":"40778","last_name":"Meyer","first_name":"Marius"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","full_name":"Plessl, Christian","id":"16153"},{"first_name":"Tobias","last_name":"Kenter","full_name":"Kenter, Tobias","id":"3145"}],"date_created":"2024-10-14T07:59:08Z","volume":35,"title":"HiHiSpMV: Sparse Matrix Vector Multiplication with Hierarchical Row Reductions on FPGAs with High Bandwidth Memory","doi":"10.1109/fccm60383.2024.00014"},{"publication":"2024 34th International Conference on Field-Programmable Logic and Applications (FPL)","type":"conference","status":"public","abstract":[{"lang":"eng","text":"The computation of electron repulsion integrals (ERIs) is a key component for quantum chemical methods. The intensive computation and bandwidth demand for ERI evaluation presents a significant challenge for quantum-mechanics-based atomistic simulations with hybrid density functional theory: due to the tens of trillions of ERI computations in each time step, practical applications are usually limited to thousands of atoms. In this work, we propose SERI, a high-throughput streaming accelerator for ERI computation on HBM-based FPGAs. In contrast to prior buffer-based designs, SERI proposes a novel streaming architecture to address the on-chip buffer limitation and the floorplanning challenge, and leverages the high-bandwidth memory to overcome the bandwidth bottleneck in prior designs. Moreover, to meet the varying computation, bandwidth, and floorplanning requirements between the 55 canonical quartet classes in ERI calculation, we design an automation tool, together with an accurate performance model, to automatically customize the architecture and floorplanning strategy for each canonical quartet class to maximize their throughput. Our performance evaluation on the AMD/Xilinx Alveo U280 FPGA board shows that, SERI achieves an average speedup of 9.80 x over the previous best-performing FPGA design, a 3.21x speedup over a 64-core AMD EPYC 7713 CPU, and a 15.64x speedup over an Nvidia A40 GPU. It reaches a peak throughput of 23.8 GERIS ($10^9$ ERIs per second) on one Alveo U280 FPGA. SERI will be released soon at https://github.com/SFU-HiAccel/SERI."}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"77439","_id":"56609","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"language":[{"iso":"eng"}],"quality_controlled":"1","publication_status":"published","page":"60-68","citation":{"short":"P. Stachura, G. Li, X. Wu, C. Plessl, Z. Fang, in: 2024 34th International Conference on Field-Programmable Logic and Applications (FPL), IEEE, 2024, pp. 60–68.","mla":"Stachura, Philip, et al. “SERI: High-Throughput Streaming Acceleration of Electron Repulsion Integral Computation in Quantum Chemistry Using HBM-Based FPGAs.” <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>, IEEE, 2024, pp. 60–68, doi:<a href=\"https://doi.org/10.1109/fpl64840.2024.00018\">10.1109/fpl64840.2024.00018</a>.","bibtex":"@inproceedings{Stachura_Li_Wu_Plessl_Fang_2024, title={SERI: High-Throughput Streaming Acceleration of Electron Repulsion Integral Computation in Quantum Chemistry using HBM-based FPGAs}, DOI={<a href=\"https://doi.org/10.1109/fpl64840.2024.00018\">10.1109/fpl64840.2024.00018</a>}, booktitle={2024 34th International Conference on Field-Programmable Logic and Applications (FPL)}, publisher={IEEE}, author={Stachura, Philip and Li, Guanyu and Wu, Xin and Plessl, Christian and Fang, Zhenman}, year={2024}, pages={60–68} }","apa":"Stachura, P., Li, G., Wu, X., Plessl, C., &#38; Fang, Z. (2024). SERI: High-Throughput Streaming Acceleration of Electron Repulsion Integral Computation in Quantum Chemistry using HBM-based FPGAs. <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>, 60–68. <a href=\"https://doi.org/10.1109/fpl64840.2024.00018\">https://doi.org/10.1109/fpl64840.2024.00018</a>","ama":"Stachura P, Li G, Wu X, Plessl C, Fang Z. SERI: High-Throughput Streaming Acceleration of Electron Repulsion Integral Computation in Quantum Chemistry using HBM-based FPGAs. In: <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>. IEEE; 2024:60-68. doi:<a href=\"https://doi.org/10.1109/fpl64840.2024.00018\">10.1109/fpl64840.2024.00018</a>","chicago":"Stachura, Philip, Guanyu Li, Xin Wu, Christian Plessl, and Zhenman Fang. “SERI: High-Throughput Streaming Acceleration of Electron Repulsion Integral Computation in Quantum Chemistry Using HBM-Based FPGAs.” In <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>, 60–68. IEEE, 2024. <a href=\"https://doi.org/10.1109/fpl64840.2024.00018\">https://doi.org/10.1109/fpl64840.2024.00018</a>.","ieee":"P. Stachura, G. Li, X. Wu, C. Plessl, and Z. Fang, “SERI: High-Throughput Streaming Acceleration of Electron Repulsion Integral Computation in Quantum Chemistry using HBM-based FPGAs,” in <i>2024 34th International Conference on Field-Programmable Logic and Applications (FPL)</i>, 2024, pp. 60–68, doi: <a href=\"https://doi.org/10.1109/fpl64840.2024.00018\">10.1109/fpl64840.2024.00018</a>."},"year":"2024","date_created":"2024-10-14T08:44:44Z","author":[{"first_name":"Philip","full_name":"Stachura, Philip","last_name":"Stachura"},{"full_name":"Li, Guanyu","last_name":"Li","first_name":"Guanyu"},{"last_name":"Wu","id":"77439","full_name":"Wu, Xin","first_name":"Xin"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","full_name":"Plessl, Christian","id":"16153"},{"first_name":"Zhenman","full_name":"Fang, Zhenman","last_name":"Fang"}],"publisher":"IEEE","date_updated":"2024-10-15T08:37:27Z","doi":"10.1109/fpl64840.2024.00018","main_file_link":[{"url":"https://ieeexplore.ieee.org/document/10705609"}],"title":"SERI: High-Throughput Streaming Acceleration of Electron Repulsion Integral Computation in Quantum Chemistry using HBM-based FPGAs"},{"language":[{"iso":"eng"}],"article_number":"11","user_id":"16153","department":[{"_id":"27"},{"_id":"518"}],"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"54312","status":"public","type":"conference","publication":"Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)","doi":"10.1145/3659914.3659925","title":"Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL","author":[{"last_name":"Büttner","full_name":"Büttner, Markus","first_name":"Markus"},{"first_name":"Christoph","last_name":"Alt","full_name":"Alt, Christoph","id":"100625"},{"first_name":"Tobias","last_name":"Kenter","full_name":"Kenter, Tobias","id":"3145"},{"first_name":"Harald","last_name":"Köstler","full_name":"Köstler, Harald"},{"id":"16153","full_name":"Plessl, Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","first_name":"Christian"},{"first_name":"Vadym","last_name":"Aizinger","full_name":"Aizinger, Vadym"}],"date_created":"2024-05-16T13:24:49Z","publisher":"ACM","date_updated":"2024-11-27T22:50:19Z","citation":{"ama":"Büttner M, Alt C, Kenter T, Köstler H, Plessl C, Aizinger V. Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL. In: <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>. ACM; 2024. doi:<a href=\"https://doi.org/10.1145/3659914.3659925\">10.1145/3659914.3659925</a>","ieee":"M. Büttner, C. Alt, T. Kenter, H. Köstler, C. Plessl, and V. Aizinger, “Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL,” 2024, doi: <a href=\"https://doi.org/10.1145/3659914.3659925\">10.1145/3659914.3659925</a>.","chicago":"Büttner, Markus, Christoph Alt, Tobias Kenter, Harald Köstler, Christian Plessl, and Vadym Aizinger. “Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL.” In <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>. ACM, 2024. <a href=\"https://doi.org/10.1145/3659914.3659925\">https://doi.org/10.1145/3659914.3659925</a>.","apa":"Büttner, M., Alt, C., Kenter, T., Köstler, H., Plessl, C., &#38; Aizinger, V. (2024). Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL. <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>, Article 11. <a href=\"https://doi.org/10.1145/3659914.3659925\">https://doi.org/10.1145/3659914.3659925</a>","mla":"Büttner, Markus, et al. “Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL.” <i>Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)</i>, 11, ACM, 2024, doi:<a href=\"https://doi.org/10.1145/3659914.3659925\">10.1145/3659914.3659925</a>.","short":"M. Büttner, C. Alt, T. Kenter, H. Köstler, C. Plessl, V. Aizinger, in: Proceedings of the Platform for Advanced Scientific Computing Conference (PASC), ACM, 2024.","bibtex":"@inproceedings{Büttner_Alt_Kenter_Köstler_Plessl_Aizinger_2024, title={Enabling Performance Portability for Shallow Water Equations on CPUs, GPUs, and FPGAs with SYCL}, DOI={<a href=\"https://doi.org/10.1145/3659914.3659925\">10.1145/3659914.3659925</a>}, number={11}, booktitle={Proceedings of the Platform for Advanced Scientific Computing Conference (PASC)}, publisher={ACM}, author={Büttner, Markus and Alt, Christoph and Kenter, Tobias and Köstler, Harald and Plessl, Christian and Aizinger, Vadym}, year={2024} }"},"year":"2024","publication_status":"published","quality_controlled":"1"},{"status":"public","type":"conference","publication":"2024 IEEE Conference on Games (CoG)","language":[{"iso":"eng"}],"user_id":"47054","_id":"60338","citation":{"apa":"Heuchler, S., &#38; Plessl, C. (2024). Reproduction and Extension of Playing Strength Models in Computer Go. <i>2024 IEEE Conference on Games (CoG)</i>. <a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">https://doi.org/10.1109/cog60054.2024.10645535</a>","mla":"Heuchler, Sebastian, and Christian Plessl. “Reproduction and Extension of Playing Strength Models in Computer Go.” <i>2024 IEEE Conference on Games (CoG)</i>, IEEE, 2024, doi:<a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>.","short":"S. Heuchler, C. Plessl, in: 2024 IEEE Conference on Games (CoG), IEEE, 2024.","bibtex":"@inproceedings{Heuchler_Plessl_2024, title={Reproduction and Extension of Playing Strength Models in Computer Go}, DOI={<a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>}, booktitle={2024 IEEE Conference on Games (CoG)}, publisher={IEEE}, author={Heuchler, Sebastian and Plessl, Christian}, year={2024} }","chicago":"Heuchler, Sebastian, and Christian Plessl. “Reproduction and Extension of Playing Strength Models in Computer Go.” In <i>2024 IEEE Conference on Games (CoG)</i>. IEEE, 2024. <a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">https://doi.org/10.1109/cog60054.2024.10645535</a>.","ieee":"S. Heuchler and C. Plessl, “Reproduction and Extension of Playing Strength Models in Computer Go,” 2024, doi: <a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>.","ama":"Heuchler S, Plessl C. Reproduction and Extension of Playing Strength Models in Computer Go. In: <i>2024 IEEE Conference on Games (CoG)</i>. IEEE; 2024. doi:<a href=\"https://doi.org/10.1109/cog60054.2024.10645535\">10.1109/cog60054.2024.10645535</a>"},"year":"2024","publication_status":"published","doi":"10.1109/cog60054.2024.10645535","title":"Reproduction and Extension of Playing Strength Models in Computer Go","author":[{"full_name":"Heuchler, Sebastian","id":"47054","last_name":"Heuchler","first_name":"Sebastian"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian"}],"date_created":"2025-06-23T12:29:07Z","publisher":"IEEE","date_updated":"2025-06-23T12:35:55Z"},{"publisher":"Springer Nature Switzerland","date_created":"2025-11-04T09:50:24Z","title":"Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL","quality_controlled":"1","year":"2024","language":[{"iso":"eng"}],"publication":"Lecture Notes in Computer Science","abstract":[{"lang":"eng","text":"Most FPGA boards in the HPC domain are well-suited for parallel scaling because of the direct integration of versatile and high-throughput network ports. However, the utilization of their network capabilities is often challenging and error-prone because the whole network stack and communication patterns have to be implemented and managed on the FPGAs. Also, this approach conceptually involves a trade-off between the performance potential of improved communication and the impact of resource consumption for communication infrastructure, since the utilized resources on the FPGAs could otherwise be used for computations. In this work, we investigate this trade-off, firstly, by using synthetic benchmarks to evaluate the different configuration options of the communication framework ACCL and their impact on communication latency and throughput. Finally, we use our findings to implement a shallow water simulation whose scalability heavily depends on low-latency communication. With a suitable configuration of ACCL, good scaling behavior can be shown to all 48 FPGAs installed in the system. Overall, the results show that the availability of inter-FPGA communication frameworks as well as the configurability of framework and network stack are crucial to achieve the best application performance with low latency communication."}],"oa":"1","date_updated":"2025-11-04T09:51:22Z","author":[{"last_name":"Meyer","full_name":"Meyer, Marius","id":"40778","first_name":"Marius"},{"first_name":"Tobias","full_name":"Kenter, Tobias","id":"3145","last_name":"Kenter"},{"first_name":"Lucian","full_name":"Petrica, Lucian","last_name":"Petrica"},{"first_name":"Kenneth","full_name":"O’Brien, Kenneth","last_name":"O’Brien"},{"first_name":"Michaela","full_name":"Blott, Michaela","last_name":"Blott"},{"first_name":"Christian","full_name":"Plessl, Christian","id":"16153","orcid":"0000-0001-5728-9982","last_name":"Plessl"}],"doi":"10.1007/978-3-031-69766-1_9","main_file_link":[{"open_access":"1"}],"publication_identifier":{"issn":["0302-9743","1611-3349"],"isbn":["9783031697654","9783031697661"]},"publication_status":"published","place":"Cham","citation":{"bibtex":"@inbook{Meyer_Kenter_Petrica_O’Brien_Blott_Plessl_2024, place={Cham}, title={Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL}, DOI={<a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">10.1007/978-3-031-69766-1_9</a>}, booktitle={Lecture Notes in Computer Science}, publisher={Springer Nature Switzerland}, author={Meyer, Marius and Kenter, Tobias and Petrica, Lucian and O’Brien, Kenneth and Blott, Michaela and Plessl, Christian}, year={2024} }","mla":"Meyer, Marius, et al. “Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL.” <i>Lecture Notes in Computer Science</i>, Springer Nature Switzerland, 2024, doi:<a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">10.1007/978-3-031-69766-1_9</a>.","short":"M. Meyer, T. Kenter, L. Petrica, K. O’Brien, M. Blott, C. Plessl, in: Lecture Notes in Computer Science, Springer Nature Switzerland, Cham, 2024.","apa":"Meyer, M., Kenter, T., Petrica, L., O’Brien, K., Blott, M., &#38; Plessl, C. (2024). Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL. In <i>Lecture Notes in Computer Science</i>. Springer Nature Switzerland. <a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">https://doi.org/10.1007/978-3-031-69766-1_9</a>","chicago":"Meyer, Marius, Tobias Kenter, Lucian Petrica, Kenneth O’Brien, Michaela Blott, and Christian Plessl. “Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL.” In <i>Lecture Notes in Computer Science</i>. Cham: Springer Nature Switzerland, 2024. <a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">https://doi.org/10.1007/978-3-031-69766-1_9</a>.","ieee":"M. Meyer, T. Kenter, L. Petrica, K. O’Brien, M. Blott, and C. Plessl, “Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL,” in <i>Lecture Notes in Computer Science</i>, Cham: Springer Nature Switzerland, 2024.","ama":"Meyer M, Kenter T, Petrica L, O’Brien K, Blott M, Plessl C. Optimizing Communication for Latency Sensitive HPC Applications on up to 48 FPGAs Using ACCL. In: <i>Lecture Notes in Computer Science</i>. Springer Nature Switzerland; 2024. doi:<a href=\"https://doi.org/10.1007/978-3-031-69766-1_9\">10.1007/978-3-031-69766-1_9</a>"},"_id":"62067","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"3145","type":"book_chapter","status":"public"},{"user_id":"3145","department":[{"_id":"27"},{"_id":"518"}],"project":[{"_id":"52","name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"56604","type":"journal_article","status":"public","author":[{"id":"100210","full_name":"Van Hirtum, Lennart","last_name":"Van Hirtum","first_name":"Lennart"},{"first_name":"Patrick","last_name":"De Causmaecker","full_name":"De Causmaecker, Patrick"},{"full_name":"Goemaere, Jens","last_name":"Goemaere","first_name":"Jens"},{"id":"3145","full_name":"Kenter, Tobias","last_name":"Kenter","first_name":"Tobias"},{"first_name":"Heinrich","last_name":"Riebler","full_name":"Riebler, Heinrich","id":"8961"},{"first_name":"Michael","full_name":"Lass, Michael","id":"24135","last_name":"Lass","orcid":"0000-0002-5708-7632"},{"first_name":"Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl","id":"16153","full_name":"Plessl, Christian"}],"volume":17,"date_updated":"2025-11-04T09:53:26Z","oa":"1","main_file_link":[{"open_access":"1"}],"doi":"10.1145/3674147","publication_status":"published","publication_identifier":{"issn":["1936-7406","1936-7414"]},"citation":{"ama":"Van Hirtum L, De Causmaecker P, Goemaere J, et al. A Computation of the Ninth Dedekind Number Using FPGA Supercomputing. <i>ACM Transactions on Reconfigurable Technology and Systems</i>. 2024;17(3):1-28. doi:<a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>","chicago":"Van Hirtum, Lennart, Patrick De Causmaecker, Jens Goemaere, Tobias Kenter, Heinrich Riebler, Michael Lass, and Christian Plessl. “A Computation of the Ninth Dedekind Number Using FPGA Supercomputing.” <i>ACM Transactions on Reconfigurable Technology and Systems</i> 17, no. 3 (2024): 1–28. <a href=\"https://doi.org/10.1145/3674147\">https://doi.org/10.1145/3674147</a>.","ieee":"L. Van Hirtum <i>et al.</i>, “A Computation of the Ninth Dedekind Number Using FPGA Supercomputing,” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, vol. 17, no. 3, pp. 1–28, 2024, doi: <a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>.","mla":"Van Hirtum, Lennart, et al. “A Computation of the Ninth Dedekind Number Using FPGA Supercomputing.” <i>ACM Transactions on Reconfigurable Technology and Systems</i>, vol. 17, no. 3, Association for Computing Machinery (ACM), 2024, pp. 1–28, doi:<a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>.","short":"L. Van Hirtum, P. De Causmaecker, J. Goemaere, T. Kenter, H. Riebler, M. Lass, C. Plessl, ACM Transactions on Reconfigurable Technology and Systems 17 (2024) 1–28.","bibtex":"@article{Van Hirtum_De Causmaecker_Goemaere_Kenter_Riebler_Lass_Plessl_2024, title={A Computation of the Ninth Dedekind Number Using FPGA Supercomputing}, volume={17}, DOI={<a href=\"https://doi.org/10.1145/3674147\">10.1145/3674147</a>}, number={3}, journal={ACM Transactions on Reconfigurable Technology and Systems}, publisher={Association for Computing Machinery (ACM)}, author={Van Hirtum, Lennart and De Causmaecker, Patrick and Goemaere, Jens and Kenter, Tobias and Riebler, Heinrich and Lass, Michael and Plessl, Christian}, year={2024}, pages={1–28} }","apa":"Van Hirtum, L., De Causmaecker, P., Goemaere, J., Kenter, T., Riebler, H., Lass, M., &#38; Plessl, C. (2024). A Computation of the Ninth Dedekind Number Using FPGA Supercomputing. <i>ACM Transactions on Reconfigurable Technology and Systems</i>, <i>17</i>(3), 1–28. <a href=\"https://doi.org/10.1145/3674147\">https://doi.org/10.1145/3674147</a>"},"page":"1-28","intvolume":"        17","language":[{"iso":"eng"}],"publication":"ACM Transactions on Reconfigurable Technology and Systems","abstract":[{"text":"This manuscript makes the claim of having computed the 9th Dedekind number, D(9). This was done by accelerating the core operation of the process with an efficient FPGA design that outperforms an optimized 64-core CPU reference by 95x. The FPGA execution was parallelized on the Noctua 2 supercomputer at Paderborn University. The resulting value for D(9) is 286386577668298411128469151667598498812366. This value can be verified in two steps. We have made the data file containing the 490 M results available, each of which can be verified separately on CPU, and the whole file sums to our proposed value. The paper explains the mathematical approach in the first part, before putting the focus on a deep dive into the FPGA accelerator implementation followed by a performance analysis. The FPGA implementation was done in Register-Transfer Level using a dual-clock architecture and shows how we achieved an impressive FMax of 450 MHz on the targeted Stratix 10 GX 2,800 FPGAs. The total compute time used was 47,000 FPGA hours.","lang":"eng"}],"date_created":"2024-10-14T07:38:29Z","publisher":"Association for Computing Machinery (ACM)","title":"A Computation of the Ninth Dedekind Number Using FPGA Supercomputing","issue":"3","quality_controlled":"1","year":"2024"},{"publication":"Quantum Science and Technology","abstract":[{"lang":"eng","text":"At large scales, quantum systems may become advantageous over their classical counterparts at performing certain tasks. Developing tools to analyze these systems at the relevant scales, in a manner consistent with quantum mechanics, is therefore critical to benchmarking performance and characterizing their operation. While classical computational approaches cannot perform like-for-like computations of quantum systems beyond a certain scale, classical high-performance computing (HPC) may nevertheless be useful for precisely these characterization and certification tasks. By developing open-source customized algorithms using high-performance computing, we perform quantum tomography on a megascale quantum photonic detector covering a Hilbert space of 106. This requires finding 108 elements of the matrix corresponding to the positive operator valued measure (POVM), the quantum description of the detector, and is achieved in minutes of computation time. Moreover, by exploiting the structure of the problem, we achieve highly efficient parallel scaling, paving the way for quantum objects up to a system size of 1012 elements to be reconstructed using this method. In general, this shows that a consistent quantum mechanical description of quantum phenomena is applicable at everyday scales. More concretely, this enables the reconstruction of large-scale quantum sources, processes and detectors used in computation and sampling tasks, which may be necessary to prove their nonclassical character or quantum computational advantage."}],"external_id":{"arxiv":["2404.02844"]},"language":[{"iso":"eng"}],"issue":"1","year":"2024","publisher":"IOP Publishing","date_created":"2024-04-04T08:43:18Z","title":"Scalable quantum detector tomography by high-performance computing","type":"journal_article","status":"public","_id":"53202","project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"},{"name":"ERC-Grant: QuESADILLA: Quantum Engineering Superconducting Array Detectors in Low-Light Applications","_id":"239"},{"_id":"191","name":"PhoQuant: Photonische Quantencomputer -  Quantencomputing Testplattform"}],"department":[{"_id":"27"},{"_id":"623"},{"_id":"15"}],"user_id":"55629","intvolume":"        10","citation":{"mla":"Schapeler, Timon, et al. “Scalable Quantum Detector Tomography by High-Performance Computing.” <i>Quantum Science and Technology</i>, vol. 10, no. 1, IOP Publishing, 2024, doi:<a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>.","short":"T. Schapeler, R. Schade, M. Lass, C. Plessl, T. Bartley, Quantum Science and Technology 10 (2024).","bibtex":"@article{Schapeler_Schade_Lass_Plessl_Bartley_2024, title={Scalable quantum detector tomography by high-performance computing}, volume={10}, DOI={<a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>}, number={1}, journal={Quantum Science and Technology}, publisher={IOP Publishing}, author={Schapeler, Timon and Schade, Robert and Lass, Michael and Plessl, Christian and Bartley, Tim}, year={2024} }","apa":"Schapeler, T., Schade, R., Lass, M., Plessl, C., &#38; Bartley, T. (2024). Scalable quantum detector tomography by high-performance computing. <i>Quantum Science and Technology</i>, <i>10</i>(1). <a href=\"https://doi.org/10.1088/2058-9565/ad8511\">https://doi.org/10.1088/2058-9565/ad8511</a>","ama":"Schapeler T, Schade R, Lass M, Plessl C, Bartley T. Scalable quantum detector tomography by high-performance computing. <i>Quantum Science and Technology</i>. 2024;10(1). doi:<a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>","ieee":"T. Schapeler, R. Schade, M. Lass, C. Plessl, and T. Bartley, “Scalable quantum detector tomography by high-performance computing,” <i>Quantum Science and Technology</i>, vol. 10, no. 1, 2024, doi: <a href=\"https://doi.org/10.1088/2058-9565/ad8511\">10.1088/2058-9565/ad8511</a>.","chicago":"Schapeler, Timon, Robert Schade, Michael Lass, Christian Plessl, and Tim Bartley. “Scalable Quantum Detector Tomography by High-Performance Computing.” <i>Quantum Science and Technology</i> 10, no. 1 (2024). <a href=\"https://doi.org/10.1088/2058-9565/ad8511\">https://doi.org/10.1088/2058-9565/ad8511</a>."},"date_updated":"2025-12-16T11:32:12Z","oa":"1","volume":10,"author":[{"full_name":"Schapeler, Timon","id":"55629","last_name":"Schapeler","orcid":"0000-0001-7652-1716","first_name":"Timon"},{"id":"75963","full_name":"Schade, Robert","last_name":"Schade","orcid":"0000-0002-6268-5397","first_name":"Robert"},{"first_name":"Michael","id":"24135","full_name":"Lass, Michael","orcid":"0000-0002-5708-7632","last_name":"Lass"},{"full_name":"Plessl, Christian","id":"16153","orcid":"0000-0001-5728-9982","last_name":"Plessl","first_name":"Christian"},{"first_name":"Tim","last_name":"Bartley","id":"49683","full_name":"Bartley, Tim"}],"doi":"10.1088/2058-9565/ad8511","main_file_link":[{"open_access":"1"}]},{"date_updated":"2024-01-22T09:56:42Z","author":[{"last_name":"Van Hirtum","full_name":"Van Hirtum, Lennart","first_name":"Lennart"},{"first_name":"Patrick","last_name":"De Causmaecker","full_name":"De Causmaecker, Patrick"},{"first_name":"Jens","full_name":"Goemaere, Jens","last_name":"Goemaere"},{"first_name":"Tobias","id":"3145","full_name":"Kenter, Tobias","last_name":"Kenter"},{"first_name":"Heinrich","last_name":"Riebler","full_name":"Riebler, Heinrich","id":"8961"},{"first_name":"Michael","orcid":"0000-0002-5708-7632","last_name":"Lass","id":"24135","full_name":"Lass, Michael"},{"first_name":"Christian","id":"16153","full_name":"Plessl, Christian","orcid":"0000-0001-5728-9982","last_name":"Plessl"}],"date_created":"2023-04-08T11:05:29Z","title":"A computation of D(9) using FPGA Supercomputing","year":"2023","citation":{"apa":"Van Hirtum, L., De Causmaecker, P., Goemaere, J., Kenter, T., Riebler, H., Lass, M., &#38; Plessl, C. (2023). A computation of D(9) using FPGA Supercomputing. In <i>arXiv:2304.03039</i>.","short":"L. Van Hirtum, P. De Causmaecker, J. Goemaere, T. Kenter, H. Riebler, M. Lass, C. Plessl, ArXiv:2304.03039 (2023).","bibtex":"@article{Van Hirtum_De Causmaecker_Goemaere_Kenter_Riebler_Lass_Plessl_2023, title={A computation of D(9) using FPGA Supercomputing}, journal={arXiv:2304.03039}, author={Van Hirtum, Lennart and De Causmaecker, Patrick and Goemaere, Jens and Kenter, Tobias and Riebler, Heinrich and Lass, Michael and Plessl, Christian}, year={2023} }","mla":"Van Hirtum, Lennart, et al. “A Computation of D(9) Using FPGA Supercomputing.” <i>ArXiv:2304.03039</i>, 2023.","ieee":"L. Van Hirtum <i>et al.</i>, “A computation of D(9) using FPGA Supercomputing,” <i>arXiv:2304.03039</i>. 2023.","chicago":"Van Hirtum, Lennart, Patrick De Causmaecker, Jens Goemaere, Tobias Kenter, Heinrich Riebler, Michael Lass, and Christian Plessl. “A Computation of D(9) Using FPGA Supercomputing.” <i>ArXiv:2304.03039</i>, 2023.","ama":"Van Hirtum L, De Causmaecker P, Goemaere J, et al. A computation of D(9) using FPGA Supercomputing. <i>arXiv:230403039</i>. Published online 2023."},"_id":"43439","external_id":{"arxiv":["2304.03039"]},"project":[{"name":"PC2: Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"27"},{"_id":"518"}],"user_id":"3145","language":[{"iso":"eng"}],"publication":"arXiv:2304.03039","type":"preprint","abstract":[{"lang":"eng","text":"This preprint makes the claim of having computed the $9^{th}$ Dedekind\r\nNumber. This was done by building an efficient FPGA Accelerator for the core\r\noperation of the process, and parallelizing it on the Noctua 2 Supercluster at\r\nPaderborn University. The resulting value is\r\n286386577668298411128469151667598498812366. This value can be verified in two\r\nsteps. We have made the data file containing the 490M results available, each\r\nof which can be verified separately on CPU, and the whole file sums to our\r\nproposed value."}],"status":"public"}]
