[{"publication_status":"published","publication_identifier":{"isbn":["9783030715922","9783030715939"],"issn":["0302-9743","1611-3349"]},"has_accepted_license":"1","place":"Cham","citation":{"apa":"Alhaddad, S., Förstner, J., Groth, S., Grünewald, D., Grynko, Y., Hannig, F., Kenter, T., Pfreundt, F.-J., Plessl, C., Schotte, M., Steinke, T., Teich, J., Weiser, M., &#38; Wende, F. (2021). HighPerMeshes – A Domain-Specific Language for Numerical Algorithms on Unstructured Grids. In <i>Euro-Par 2020: Parallel Processing Workshops</i>. <a href=\"https://doi.org/10.1007/978-3-030-71593-9_15\">https://doi.org/10.1007/978-3-030-71593-9_15</a>","bibtex":"@inbook{Alhaddad_Förstner_Groth_Grünewald_Grynko_Hannig_Kenter_Pfreundt_Plessl_Schotte_et al._2021, place={Cham}, title={HighPerMeshes – A Domain-Specific Language for Numerical Algorithms on Unstructured Grids}, DOI={<a href=\"https://doi.org/10.1007/978-3-030-71593-9_15\">10.1007/978-3-030-71593-9_15</a>}, booktitle={Euro-Par 2020: Parallel Processing Workshops}, author={Alhaddad, Samer and Förstner, Jens and Groth, Stefan and Grünewald, Daniel and Grynko, Yevgen and Hannig, Frank and Kenter, Tobias and Pfreundt, Franz-Josef and Plessl, Christian and Schotte, Merlind and et al.}, year={2021} }","short":"S. Alhaddad, J. Förstner, S. Groth, D. Grünewald, Y. Grynko, F. Hannig, T. Kenter, F.-J. Pfreundt, C. Plessl, M. Schotte, T. Steinke, J. Teich, M. Weiser, F. Wende, in: Euro-Par 2020: Parallel Processing Workshops, Cham, 2021.","mla":"Alhaddad, Samer, et al. “HighPerMeshes – A Domain-Specific Language for Numerical Algorithms on Unstructured Grids.” <i>Euro-Par 2020: Parallel Processing Workshops</i>, 2021, doi:<a href=\"https://doi.org/10.1007/978-3-030-71593-9_15\">10.1007/978-3-030-71593-9_15</a>.","chicago":"Alhaddad, Samer, Jens Förstner, Stefan Groth, Daniel Grünewald, Yevgen Grynko, Frank Hannig, Tobias Kenter, et al. “HighPerMeshes – A Domain-Specific Language for Numerical Algorithms on Unstructured Grids.” In <i>Euro-Par 2020: Parallel Processing Workshops</i>. Cham, 2021. <a href=\"https://doi.org/10.1007/978-3-030-71593-9_15\">https://doi.org/10.1007/978-3-030-71593-9_15</a>.","ieee":"S. Alhaddad <i>et al.</i>, “HighPerMeshes – A Domain-Specific Language for Numerical Algorithms on Unstructured Grids,” in <i>Euro-Par 2020: Parallel Processing Workshops</i>, Cham, 2021.","ama":"Alhaddad S, Förstner J, Groth S, et al. HighPerMeshes – A Domain-Specific Language for Numerical Algorithms on Unstructured Grids. In: <i>Euro-Par 2020: Parallel Processing Workshops</i>. ; 2021. doi:<a href=\"https://doi.org/10.1007/978-3-030-71593-9_15\">10.1007/978-3-030-71593-9_15</a>"},"date_updated":"2023-09-26T11:40:25Z","author":[{"last_name":"Alhaddad","full_name":"Alhaddad, Samer","id":"42456","first_name":"Samer"},{"full_name":"Förstner, Jens","id":"158","orcid":"0000-0001-7059-9862","last_name":"Förstner","first_name":"Jens"},{"last_name":"Groth","full_name":"Groth, Stefan","first_name":"Stefan"},{"last_name":"Grünewald","full_name":"Grünewald, Daniel","first_name":"Daniel"},{"id":"26059","full_name":"Grynko, Yevgen","last_name":"Grynko","first_name":"Yevgen"},{"last_name":"Hannig","full_name":"Hannig, Frank","first_name":"Frank"},{"full_name":"Kenter, Tobias","id":"3145","last_name":"Kenter","first_name":"Tobias"},{"full_name":"Pfreundt, Franz-Josef","last_name":"Pfreundt","first_name":"Franz-Josef"},{"first_name":"Christian","full_name":"Plessl, Christian","id":"16153","orcid":"0000-0001-5728-9982","last_name":"Plessl"},{"first_name":"Merlind","last_name":"Schotte","full_name":"Schotte, Merlind"},{"full_name":"Steinke, Thomas","last_name":"Steinke","first_name":"Thomas"},{"last_name":"Teich","full_name":"Teich, Jürgen","first_name":"Jürgen"},{"last_name":"Weiser","full_name":"Weiser, Martin","first_name":"Martin"},{"full_name":"Wende, Florian","last_name":"Wende","first_name":"Florian"}],"doi":"10.1007/978-3-030-71593-9_15","type":"book_chapter","status":"public","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"21587","user_id":"15278","department":[{"_id":"61"},{"_id":"230"},{"_id":"429"},{"_id":"27"},{"_id":"518"}],"file_date_updated":"2021-03-31T19:42:52Z","quality_controlled":"1","year":"2021","date_created":"2021-03-31T19:39:42Z","title":"HighPerMeshes – A Domain-Specific Language for Numerical Algorithms on Unstructured Grids","publication":"Euro-Par 2020: Parallel Processing Workshops","abstract":[{"text":"Solving partial differential equations on unstructured grids is a cornerstone of engineering and scientific computing. Nowadays, heterogeneous parallel platforms with CPUs, GPUs, and FPGAs enable energy-efficient and computationally demanding simulations. We developed the HighPerMeshes C++-embedded Domain-Specific Language (DSL) for bridging the abstraction gap between the mathematical and algorithmic formulation of mesh-based algorithms for PDE problems on the one hand and an increasing number of heterogeneous platforms with their different parallel programming and runtime models on the other hand. Thus, the HighPerMeshes DSL aims at higher productivity in the code development process for multiple target platforms. We introduce the concepts as well as the basic structure of the HighPerMeshes DSL, and demonstrate its usage with three examples, a Poisson and monodomain problem, respectively, solved by the continuous finite element method, and the discontinuous Galerkin method for Maxwell’s equation. The mapping of the abstract algorithmic description onto parallel hardware, including distributed memory compute clusters, is presented. Finally, the achievable performance and scalability are demonstrated for a typical example problem on a multi-core CPU cluster.","lang":"eng"}],"file":[{"date_updated":"2021-03-31T19:42:52Z","creator":"fossie","date_created":"2021-03-31T19:42:52Z","file_size":564398,"file_name":"2021-03 Alhaddad2021_Chapter_HighPerMeshesADomain-SpecificL.pdf","file_id":"21588","access_level":"closed","content_type":"application/pdf","success":1,"relation":"main_file"}],"ddc":["004"],"keyword":["tet_topic_hpc"],"language":[{"iso":"eng"}]},{"type":"journal_article","status":"public","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"},{"_id":"33","name":"HighPerMeshes","grant_number":"01|H16005A"}],"_id":"24788","user_id":"15278","department":[{"_id":"61"},{"_id":"230"},{"_id":"27"},{"_id":"518"}],"file_date_updated":"2021-09-22T06:19:29Z","publication_status":"published","publication_identifier":{"issn":["1532-0626","1532-0634"]},"has_accepted_license":"1","citation":{"ama":"Alhaddad S, Förstner J, Groth S, et al. The HighPerMeshes framework for numerical algorithms on unstructured grids. <i>Concurrency and Computation: Practice and Experience</i>. Published online 2021:e6616. doi:<a href=\"https://doi.org/10.1002/cpe.6616\">10.1002/cpe.6616</a>","chicago":"Alhaddad, Samer, Jens Förstner, Stefan Groth, Daniel Grünewald, Yevgen Grynko, Frank Hannig, Tobias Kenter, et al. “The HighPerMeshes Framework for Numerical Algorithms on Unstructured Grids.” <i>Concurrency and Computation: Practice and Experience</i>, 2021, e6616. <a href=\"https://doi.org/10.1002/cpe.6616\">https://doi.org/10.1002/cpe.6616</a>.","ieee":"S. Alhaddad <i>et al.</i>, “The HighPerMeshes framework for numerical algorithms on unstructured grids,” <i>Concurrency and Computation: Practice and Experience</i>, p. e6616, 2021, doi: <a href=\"https://doi.org/10.1002/cpe.6616\">10.1002/cpe.6616</a>.","apa":"Alhaddad, S., Förstner, J., Groth, S., Grünewald, D., Grynko, Y., Hannig, F., Kenter, T., Pfreundt, F., Plessl, C., Schotte, M., Steinke, T., Teich, J., Weiser, M., &#38; Wende, F. (2021). The HighPerMeshes framework for numerical algorithms on unstructured grids. <i>Concurrency and Computation: Practice and Experience</i>, e6616. <a href=\"https://doi.org/10.1002/cpe.6616\">https://doi.org/10.1002/cpe.6616</a>","bibtex":"@article{Alhaddad_Förstner_Groth_Grünewald_Grynko_Hannig_Kenter_Pfreundt_Plessl_Schotte_et al._2021, title={The HighPerMeshes framework for numerical algorithms on unstructured grids}, DOI={<a href=\"https://doi.org/10.1002/cpe.6616\">10.1002/cpe.6616</a>}, journal={Concurrency and Computation: Practice and Experience}, author={Alhaddad, Samer and Förstner, Jens and Groth, Stefan and Grünewald, Daniel and Grynko, Yevgen and Hannig, Frank and Kenter, Tobias and Pfreundt, Franz‐Josef and Plessl, Christian and Schotte, Merlind and et al.}, year={2021}, pages={e6616} }","mla":"Alhaddad, Samer, et al. “The HighPerMeshes Framework for Numerical Algorithms on Unstructured Grids.” <i>Concurrency and Computation: Practice and Experience</i>, 2021, p. e6616, doi:<a href=\"https://doi.org/10.1002/cpe.6616\">10.1002/cpe.6616</a>.","short":"S. Alhaddad, J. Förstner, S. Groth, D. Grünewald, Y. Grynko, F. Hannig, T. Kenter, F. Pfreundt, C. Plessl, M. Schotte, T. Steinke, J. Teich, M. Weiser, F. Wende, Concurrency and Computation: Practice and Experience (2021) e6616."},"page":"e6616","oa":"1","date_updated":"2023-09-26T11:42:19Z","author":[{"id":"42456","full_name":"Alhaddad, Samer","last_name":"Alhaddad","first_name":"Samer"},{"full_name":"Förstner, Jens","id":"158","last_name":"Förstner","orcid":"0000-0001-7059-9862","first_name":"Jens"},{"last_name":"Groth","full_name":"Groth, Stefan","first_name":"Stefan"},{"first_name":"Daniel","last_name":"Grünewald","full_name":"Grünewald, Daniel"},{"full_name":"Grynko, Yevgen","id":"26059","last_name":"Grynko","first_name":"Yevgen"},{"last_name":"Hannig","full_name":"Hannig, Frank","first_name":"Frank"},{"first_name":"Tobias","id":"3145","full_name":"Kenter, Tobias","last_name":"Kenter"},{"first_name":"Franz‐Josef","last_name":"Pfreundt","full_name":"Pfreundt, Franz‐Josef"},{"first_name":"Christian","id":"16153","full_name":"Plessl, Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982"},{"last_name":"Schotte","full_name":"Schotte, Merlind","first_name":"Merlind"},{"first_name":"Thomas","last_name":"Steinke","full_name":"Steinke, Thomas"},{"first_name":"Jürgen","full_name":"Teich, Jürgen","last_name":"Teich"},{"last_name":"Weiser","full_name":"Weiser, Martin","first_name":"Martin"},{"first_name":"Florian","full_name":"Wende, Florian","last_name":"Wende"}],"doi":"10.1002/cpe.6616","publication":"Concurrency and Computation: Practice and Experience","file":[{"relation":"main_file","content_type":"application/pdf","access_level":"open_access","file_id":"24789","file_name":"2021-09 Alhaddad - Concurrency... - The HighPerMeshes framework for numerical algorithms on unstructured grids.pdf","file_size":2300152,"creator":"fossie","date_created":"2021-09-22T06:19:29Z","date_updated":"2021-09-22T06:19:29Z"}],"ddc":["004"],"keyword":["tet_topic_hpc"],"language":[{"iso":"eng"}],"quality_controlled":"1","year":"2021","date_created":"2021-09-22T06:15:50Z","title":"The HighPerMeshes framework for numerical algorithms on unstructured grids"},{"title":"Solving Maxwell's Equations with Modern C++ and SYCL: A Case Study","doi":"10.1109/ASAP.2018.8445127","date_updated":"2022-01-06T06:59:26Z","date_created":"2018-07-23T07:12:03Z","author":[{"full_name":"Afzal, Ayesha","last_name":"Afzal","first_name":"Ayesha"},{"first_name":"Christian","full_name":"Schmitt, Christian","last_name":"Schmitt"},{"last_name":"Alhaddad","full_name":"Alhaddad, Samer","id":"42456","first_name":"Samer"},{"full_name":"Grynko, Yevgen","id":"26059","last_name":"Grynko","first_name":"Yevgen"},{"full_name":"Teich, Jürgen","last_name":"Teich","first_name":"Jürgen"},{"orcid":"0000-0001-7059-9862","last_name":"Förstner","full_name":"Förstner, Jens","id":"158","first_name":"Jens"},{"first_name":"Frank","full_name":"Hannig, Frank","last_name":"Hannig"}],"year":"2018","page":"49-56","citation":{"ama":"Afzal A, Schmitt C, Alhaddad S, et al. Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study. In: <i>Proceedings of the 29th Annual IEEE International Conference on Application-Specific Systems, Architectures and Processors (ASAP)</i>. ; 2018:49-56. doi:<a href=\"https://doi.org/10.1109/ASAP.2018.8445127\">10.1109/ASAP.2018.8445127</a>","chicago":"Afzal, Ayesha, Christian Schmitt, Samer Alhaddad, Yevgen Grynko, Jürgen Teich, Jens Förstner, and Frank Hannig. “Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study.” In <i>Proceedings of the 29th Annual IEEE International Conference on Application-Specific Systems, Architectures and Processors (ASAP)</i>, 49–56, 2018. <a href=\"https://doi.org/10.1109/ASAP.2018.8445127\">https://doi.org/10.1109/ASAP.2018.8445127</a>.","ieee":"A. Afzal <i>et al.</i>, “Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study,” in <i>Proceedings of the 29th Annual IEEE International Conference on Application-specific Systems, Architectures and Processors (ASAP)</i>, 2018, pp. 49–56.","apa":"Afzal, A., Schmitt, C., Alhaddad, S., Grynko, Y., Teich, J., Förstner, J., &#38; Hannig, F. (2018). Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study. In <i>Proceedings of the 29th Annual IEEE International Conference on Application-specific Systems, Architectures and Processors (ASAP)</i> (pp. 49–56). <a href=\"https://doi.org/10.1109/ASAP.2018.8445127\">https://doi.org/10.1109/ASAP.2018.8445127</a>","short":"A. Afzal, C. Schmitt, S. Alhaddad, Y. Grynko, J. Teich, J. Förstner, F. Hannig, in: Proceedings of the 29th Annual IEEE International Conference on Application-Specific Systems, Architectures and Processors (ASAP), 2018, pp. 49–56.","mla":"Afzal, Ayesha, et al. “Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study.” <i>Proceedings of the 29th Annual IEEE International Conference on Application-Specific Systems, Architectures and Processors (ASAP)</i>, 2018, pp. 49–56, doi:<a href=\"https://doi.org/10.1109/ASAP.2018.8445127\">10.1109/ASAP.2018.8445127</a>.","bibtex":"@inproceedings{Afzal_Schmitt_Alhaddad_Grynko_Teich_Förstner_Hannig_2018, title={Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study}, DOI={<a href=\"https://doi.org/10.1109/ASAP.2018.8445127\">10.1109/ASAP.2018.8445127</a>}, booktitle={Proceedings of the 29th Annual IEEE International Conference on Application-specific Systems, Architectures and Processors (ASAP)}, author={Afzal, Ayesha and Schmitt, Christian and Alhaddad, Samer and Grynko, Yevgen and Teich, Jürgen and Förstner, Jens and Hannig, Frank}, year={2018}, pages={49–56} }"},"publication_identifier":{"isbn":["978-1-5386-7479-6"]},"has_accepted_license":"1","keyword":["tet_topic_hpc"],"ddc":["004"],"language":[{"iso":"eng"}],"file_date_updated":"2022-01-06T06:59:26Z","_id":"3588","project":[{"grant_number":"01|H16005","_id":"33","name":"HighPerMeshes"},{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"department":[{"_id":"61"}],"user_id":"158","abstract":[{"lang":"eng","text":"In scientific computing, unstructured meshes are a crucial foundation for the simulation of real-world physical phenomena. Compared to regular grids, they allow resembling the computational domain with a much higher accuracy, which in turn leads to more efficient computations.<br />There exists a wealth of supporting libraries and frameworks that aid programmers with the implementation of applications working on such grids, each built on top of existing parallelization technologies. However, many approaches require the programmer to introduce a different programming paradigm into their application or provide different variants of the code. SYCL is a new programming standard providing a remedy to this dilemma by building on standard C ++17 with its so-called single-source approach: Programmers write standard C ++ code and expose parallelism using C++17 keywords. The application is<br />then transformed into a concrete implementation by the SYCL implementation. By encapsulating the OpenCL ecosystem, different SYCL implementations enable not only the programming of CPUs but also of heterogeneous platforms such as GPUs or other devices. For the first time, this paper showcases a SYCL-<br />based solver for the nodal Discontinuous Galerkin method for Maxwell’s equations on unstructured meshes. We compare our solution to a previous C-based implementation with respect to programmability and performance on heterogeneous platforms.<br"}],"status":"public","file":[{"embargo_to":"open_access","content_type":"application/pdf","relation":"main_file","date_updated":"2022-01-06T06:59:26Z","date_created":"2018-08-21T10:12:05Z","creator":"fossie","file_size":252186,"file_name":"2018-08 Afzal - ASAP Proceedings - Solving Maxwell equations with modern C++ and SYCL.pdf","file_id":"3986","embargo":"2019-09-03","access_level":"request"}],"publication":"Proceedings of the 29th Annual IEEE International Conference on Application-specific Systems, Architectures and Processors (ASAP)","type":"conference"},{"has_accepted_license":"1","citation":{"ieee":"T. Kenter <i>et al.</i>, “OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured Meshes,” presented at the Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM), 2018, doi: <a href=\"https://doi.org/10.1109/FCCM.2018.00037\">10.1109/FCCM.2018.00037</a>.","chicago":"Kenter, Tobias, Gopinath Mahale, Samer Alhaddad, Yevgen Grynko, Christian Schmitt, Ayesha Afzal, Frank Hannig, Jens Förstner, and Christian Plessl. “OpenCL-Based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured Meshes.” In <i>Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)</i>. IEEE, 2018. <a href=\"https://doi.org/10.1109/FCCM.2018.00037\">https://doi.org/10.1109/FCCM.2018.00037</a>.","ama":"Kenter T, Mahale G, Alhaddad S, et al. OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured Meshes. In: <i>Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)</i>. IEEE; 2018. doi:<a href=\"https://doi.org/10.1109/FCCM.2018.00037\">10.1109/FCCM.2018.00037</a>","mla":"Kenter, Tobias, et al. “OpenCL-Based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured Meshes.” <i>Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)</i>, IEEE, 2018, doi:<a href=\"https://doi.org/10.1109/FCCM.2018.00037\">10.1109/FCCM.2018.00037</a>.","short":"T. Kenter, G. Mahale, S. Alhaddad, Y. Grynko, C. Schmitt, A. Afzal, F. Hannig, J. Förstner, C. Plessl, in: Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM), IEEE, 2018.","bibtex":"@inproceedings{Kenter_Mahale_Alhaddad_Grynko_Schmitt_Afzal_Hannig_Förstner_Plessl_2018, title={OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured Meshes}, DOI={<a href=\"https://doi.org/10.1109/FCCM.2018.00037\">10.1109/FCCM.2018.00037</a>}, booktitle={Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)}, publisher={IEEE}, author={Kenter, Tobias and Mahale, Gopinath and Alhaddad, Samer and Grynko, Yevgen and Schmitt, Christian and Afzal, Ayesha and Hannig, Frank and Förstner, Jens and Plessl, Christian}, year={2018} }","apa":"Kenter, T., Mahale, G., Alhaddad, S., Grynko, Y., Schmitt, C., Afzal, A., Hannig, F., Förstner, J., &#38; Plessl, C. (2018). OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured Meshes. <i>Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)</i>. Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM). <a href=\"https://doi.org/10.1109/FCCM.2018.00037\">https://doi.org/10.1109/FCCM.2018.00037</a>"},"date_updated":"2023-09-26T11:47:52Z","author":[{"last_name":"Kenter","full_name":"Kenter, Tobias","id":"3145","first_name":"Tobias"},{"full_name":"Mahale, Gopinath","last_name":"Mahale","first_name":"Gopinath"},{"first_name":"Samer","id":"42456","full_name":"Alhaddad, Samer","last_name":"Alhaddad"},{"last_name":"Grynko","id":"26059","full_name":"Grynko, Yevgen","first_name":"Yevgen"},{"first_name":"Christian","last_name":"Schmitt","full_name":"Schmitt, Christian"},{"full_name":"Afzal, Ayesha","last_name":"Afzal","first_name":"Ayesha"},{"last_name":"Hannig","full_name":"Hannig, Frank","first_name":"Frank"},{"orcid":"0000-0001-7059-9862","last_name":"Förstner","id":"158","full_name":"Förstner, Jens","first_name":"Jens"},{"orcid":"0000-0001-5728-9982","last_name":"Plessl","full_name":"Plessl, Christian","id":"16153","first_name":"Christian"}],"doi":"10.1109/FCCM.2018.00037","conference":{"name":"Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)"},"type":"conference","status":"public","project":[{"grant_number":"01|H16005A","name":"HighPerMeshes","_id":"33"},{"grant_number":"160364472","_id":"1","name":"SFB 901"},{"name":"SFB 901 - Project Area C","_id":"4"},{"grant_number":"160364472","name":"SFB 901 - Subproject C2","_id":"14"}],"_id":"1588","user_id":"15278","department":[{"_id":"27"},{"_id":"518"},{"_id":"61"}],"file_date_updated":"2018-11-02T14:45:05Z","quality_controlled":"1","year":"2018","publisher":"IEEE","date_created":"2018-03-22T10:48:01Z","title":"OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured Meshes","publication":"Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)","abstract":[{"text":"The exploration of FPGAs as accelerators for scientific simulations has so far mostly been focused on small kernels of methods working on regular data structures, for example in the form of stencil computations for finite difference methods. In computational sciences, often more advanced methods are employed that promise better stability, convergence, locality and scaling. Unstructured meshes are shown to be more effective and more accurate, compared to regular grids, in representing computation domains of various shapes. Using unstructured meshes, the discontinuous Galerkin method preserves the ability to perform explicit local update operations for simulations in the time domain. In this work, we investigate FPGAs as target platform for an implementation of the nodal discontinuous Galerkin method to find time-domain solutions of Maxwell's equations in an unstructured mesh. When maximizing data reuse and fitting constant coefficients into suitably partitioned on-chip memory, high computational intensity allows us to implement and feed wide data paths with hundreds of floating point operators. By decoupling off-chip memory accesses from the computations, high memory bandwidth can be sustained, even for the irregular access pattern required by parts of the application. Using the Intel/Altera OpenCL SDK for FPGAs, we present different implementation variants for different polynomial orders of the method. In different phases of the algorithm, either computational or bandwidth limits of the Arria 10 platform are almost reached, thus outperforming a highly multithreaded CPU implementation by around 2x.","lang":"eng"}],"file":[{"date_updated":"2018-11-02T14:45:05Z","date_created":"2018-11-02T14:45:05Z","creator":"ups","file_size":269130,"access_level":"closed","file_id":"5282","file_name":"08457652.pdf","content_type":"application/pdf","success":1,"relation":"main_file"}],"ddc":["000"],"keyword":["tet_topic_hpc"],"language":[{"iso":"eng"}]},{"status":"public","type":"conference","file_date_updated":"2018-11-02T15:02:28Z","user_id":"15278","department":[{"_id":"27"},{"_id":"518"},{"_id":"61"}],"project":[{"grant_number":"160364472","name":"SFB 901","_id":"1"},{"_id":"4","name":"SFB 901 - Project Area C"},{"grant_number":"160364472","name":"SFB 901 - Subproject C2","_id":"14"},{"grant_number":"01|H16005A","_id":"33","name":"HighPerMeshes"},{"grant_number":"PL 595/2-1 / 320898746","name":"Performance and Efficiency in HPC with Custom Computing","_id":"32"},{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"1592","citation":{"chicago":"Kenter, Tobias, Jens Förstner, and Christian Plessl. “Flexible FPGA Design for FDTD Using OpenCL.” In <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>. IEEE, 2017. <a href=\"https://doi.org/10.23919/FPL.2017.8056844\">https://doi.org/10.23919/FPL.2017.8056844</a>.","ieee":"T. Kenter, J. Förstner, and C. Plessl, “Flexible FPGA design for FDTD using OpenCL,” 2017, doi: <a href=\"https://doi.org/10.23919/FPL.2017.8056844\">10.23919/FPL.2017.8056844</a>.","ama":"Kenter T, Förstner J, Plessl C. Flexible FPGA design for FDTD using OpenCL. In: <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>. IEEE; 2017. doi:<a href=\"https://doi.org/10.23919/FPL.2017.8056844\">10.23919/FPL.2017.8056844</a>","short":"T. Kenter, J. Förstner, C. Plessl, in: Proc. Int. Conf. on Field Programmable Logic and Applications (FPL), IEEE, 2017.","mla":"Kenter, Tobias, et al. “Flexible FPGA Design for FDTD Using OpenCL.” <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>, IEEE, 2017, doi:<a href=\"https://doi.org/10.23919/FPL.2017.8056844\">10.23919/FPL.2017.8056844</a>.","bibtex":"@inproceedings{Kenter_Förstner_Plessl_2017, title={Flexible FPGA design for FDTD using OpenCL}, DOI={<a href=\"https://doi.org/10.23919/FPL.2017.8056844\">10.23919/FPL.2017.8056844</a>}, booktitle={Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}, publisher={IEEE}, author={Kenter, Tobias and Förstner, Jens and Plessl, Christian}, year={2017} }","apa":"Kenter, T., Förstner, J., &#38; Plessl, C. (2017). Flexible FPGA design for FDTD using OpenCL. <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>. <a href=\"https://doi.org/10.23919/FPL.2017.8056844\">https://doi.org/10.23919/FPL.2017.8056844</a>"},"has_accepted_license":"1","doi":"10.23919/FPL.2017.8056844","author":[{"last_name":"Kenter","id":"3145","full_name":"Kenter, Tobias","first_name":"Tobias"},{"first_name":"Jens","last_name":"Förstner","orcid":"0000-0001-7059-9862","full_name":"Förstner, Jens","id":"158"},{"first_name":"Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153"}],"date_updated":"2023-09-26T13:24:38Z","file":[{"relation":"main_file","success":1,"content_type":"application/pdf","file_id":"5291","access_level":"closed","file_name":"08056844.pdf","file_size":230235,"date_created":"2018-11-02T15:02:28Z","creator":"ups","date_updated":"2018-11-02T15:02:28Z"}],"abstract":[{"text":"Compared to classical HDL designs, generating FPGA with high-level synthesis from an OpenCL specification promises easier exploration of different design alternatives and, through ready-to-use infrastructure and common abstractions for host and memory interfaces, easier portability between different FPGA families. In this work, we evaluate the extent of this promise. To this end, we present a parameterized FDTD implementation for photonic microcavity simulations. Our design can trade-off different forms of parallelism and works for two independent OpenCL-based FPGA design flows. Hence, we can target FPGAs from different vendors and different FPGA families. We describe how we used pre-processor macros to achieve this flexibility and to work around different shortcomings of the current tools. Choosing the right design configurations, we are able to present two extremely competitive solutions for very different FPGA targets, reaching up to 172 GFLOPS sustained performance. With the portability and flexibility demonstrated, code developers not only avoid vendor lock-in, but can even make best use of real trade-offs between different architectures.","lang":"eng"}],"publication":"Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)","language":[{"iso":"eng"}],"ddc":["000"],"keyword":["tet_topic_hpc"],"year":"2017","quality_controlled":"1","title":"Flexible FPGA design for FDTD using OpenCL","date_created":"2018-03-22T11:10:23Z","publisher":"IEEE"},{"citation":{"ama":"Giefers H, Plessl C, Förstner J. Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers. <i>ACM SIGARCH Computer Architecture News</i>. 2014;41(5):65-70. doi:<a href=\"https://doi.org/10.1145/2641361.2641372\">10.1145/2641361.2641372</a>","chicago":"Giefers, Heiner, Christian Plessl, and Jens Förstner. “Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers.” <i>ACM SIGARCH Computer Architecture News</i> 41, no. 5 (2014): 65–70. <a href=\"https://doi.org/10.1145/2641361.2641372\">https://doi.org/10.1145/2641361.2641372</a>.","ieee":"H. Giefers, C. Plessl, and J. Förstner, “Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers,” <i>ACM SIGARCH Computer Architecture News</i>, vol. 41, no. 5, pp. 65–70, 2014, doi: <a href=\"https://doi.org/10.1145/2641361.2641372\">10.1145/2641361.2641372</a>.","short":"H. Giefers, C. Plessl, J. Förstner, ACM SIGARCH Computer Architecture News 41 (2014) 65–70.","mla":"Giefers, Heiner, et al. “Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers.” <i>ACM SIGARCH Computer Architecture News</i>, vol. 41, no. 5, ACM, 2014, pp. 65–70, doi:<a href=\"https://doi.org/10.1145/2641361.2641372\">10.1145/2641361.2641372</a>.","bibtex":"@article{Giefers_Plessl_Förstner_2014, title={Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers}, volume={41}, DOI={<a href=\"https://doi.org/10.1145/2641361.2641372\">10.1145/2641361.2641372</a>}, number={5}, journal={ACM SIGARCH Computer Architecture News}, publisher={ACM}, author={Giefers, Heiner and Plessl, Christian and Förstner, Jens}, year={2014}, pages={65–70} }","apa":"Giefers, H., Plessl, C., &#38; Förstner, J. (2014). Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers. <i>ACM SIGARCH Computer Architecture News</i>, <i>41</i>(5), 65–70. <a href=\"https://doi.org/10.1145/2641361.2641372\">https://doi.org/10.1145/2641361.2641372</a>"},"intvolume":"        41","page":"65-70","publication_identifier":{"issn":["0163-5964"]},"doi":"10.1145/2641361.2641372","author":[{"full_name":"Giefers, Heiner","last_name":"Giefers","first_name":"Heiner"},{"first_name":"Christian","id":"16153","full_name":"Plessl, Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982"},{"first_name":"Jens","last_name":"Förstner","orcid":"0000-0001-7059-9862","full_name":"Förstner, Jens","id":"158"}],"volume":41,"date_updated":"2023-09-26T13:35:58Z","status":"public","type":"journal_article","user_id":"15278","department":[{"_id":"27"},{"_id":"518"},{"_id":"61"},{"_id":"78"}],"_id":"1779","year":"2014","issue":"5","quality_controlled":"1","title":"Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers","date_created":"2018-03-26T13:42:34Z","publisher":"ACM","publication":"ACM SIGARCH Computer Architecture News","language":[{"iso":"eng"}],"keyword":["funding-maxup","tet_topic_hpc"]},{"keyword":["funding-upb-forschungspreis","funding-maxup","tet_topic_hpc"],"ddc":["000"],"language":[{"iso":"eng"}],"abstract":[{"text":"Although the benefits of FPGAs for accelerating scientific codes are widely acknowledged, the use of FPGA accelerators in scientific computing is not widespread because reaping these benefits requires knowledge of hardware design methods and tools that is typically not available with domain scientists. A promising but hardly investigated approach is to develop tool flows that keep the common languages for scientific code (C,C++, and Fortran) and allow the developer to augment the source code with OpenMPlike directives for instructing the compiler which parts of the application shall be offloaded the FPGA accelerator.\r\nIn this work we study whether the promise of effective FPGA acceleration with an OpenMP-like programming effort\r\ncan actually be held. Our target system is the Convey HC-1 reconfigurable computer for which an OpenMP-like\r\nprogramming environment exists. As case study we use an application from computational nanophotonics. Our results\r\nshow that a developer without previous FPGA experience could create an FPGA-accelerated application that is competitive to an optimized OpenMP-parallelized CPU version running on a two socket quad-core server. Finally, we discuss our experiences with this tool flow and the Convey HC-1 from a productivity and economic point of view.","lang":"eng"}],"file":[{"content_type":"application/pdf","relation":"main_file","success":1,"date_created":"2019-02-13T09:04:46Z","creator":"fossie","date_updated":"2019-02-13T09:04:46Z","file_id":"7638","file_name":"2012-11 Meyer,Schumacher,Plessl,Förstner_Convey vector personalities-FPGA acceleratin with an openmp-like programming effort.pdf","access_level":"closed","file_size":2148787}],"publication":"Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)","title":"Convey Vector Personalities – FPGA Acceleration with an OpenMP-like Effort?","publisher":"IEEE","date_created":"2018-03-29T15:04:25Z","year":"2012","quality_controlled":"1","file_date_updated":"2019-02-13T09:04:46Z","_id":"2106","department":[{"_id":"27"},{"_id":"518"},{"_id":"15"},{"_id":"78"}],"user_id":"15278","status":"public","type":"conference","conference":{"name":"22nd International Conference on Field Programmable Logic and Applicaitons (FPL)"},"doi":"10.1109/FPL.2012.6339370","date_updated":"2023-09-26T13:39:13Z","author":[{"first_name":"Björn","full_name":"Meyer, Björn","last_name":"Meyer"},{"last_name":"Schumacher","full_name":"Schumacher, Jörn","first_name":"Jörn"},{"first_name":"Christian","full_name":"Plessl, Christian","id":"16153","orcid":"0000-0001-5728-9982","last_name":"Plessl"},{"full_name":"Förstner, Jens","id":"158","last_name":"Förstner","orcid":"0000-0001-7059-9862","first_name":"Jens"}],"page":"189-196","citation":{"ama":"Meyer B, Schumacher J, Plessl C, Förstner J. Convey Vector Personalities – FPGA Acceleration with an OpenMP-like Effort? In: <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>. IEEE; 2012:189-196. doi:<a href=\"https://doi.org/10.1109/FPL.2012.6339370\">10.1109/FPL.2012.6339370</a>","chicago":"Meyer, Björn, Jörn Schumacher, Christian Plessl, and Jens Förstner. “Convey Vector Personalities – FPGA Acceleration with an OpenMP-like Effort?” In <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>, 189–96. IEEE, 2012. <a href=\"https://doi.org/10.1109/FPL.2012.6339370\">https://doi.org/10.1109/FPL.2012.6339370</a>.","ieee":"B. Meyer, J. Schumacher, C. Plessl, and J. Förstner, “Convey Vector Personalities – FPGA Acceleration with an OpenMP-like Effort?,” in <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>, 2012, pp. 189–196, doi: <a href=\"https://doi.org/10.1109/FPL.2012.6339370\">10.1109/FPL.2012.6339370</a>.","apa":"Meyer, B., Schumacher, J., Plessl, C., &#38; Förstner, J. (2012). Convey Vector Personalities – FPGA Acceleration with an OpenMP-like Effort? <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>, 189–196. <a href=\"https://doi.org/10.1109/FPL.2012.6339370\">https://doi.org/10.1109/FPL.2012.6339370</a>","bibtex":"@inproceedings{Meyer_Schumacher_Plessl_Förstner_2012, title={Convey Vector Personalities – FPGA Acceleration with an OpenMP-like Effort?}, DOI={<a href=\"https://doi.org/10.1109/FPL.2012.6339370\">10.1109/FPL.2012.6339370</a>}, booktitle={Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}, publisher={IEEE}, author={Meyer, Björn and Schumacher, Jörn and Plessl, Christian and Förstner, Jens}, year={2012}, pages={189–196} }","mla":"Meyer, Björn, et al. “Convey Vector Personalities – FPGA Acceleration with an OpenMP-like Effort?” <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>, IEEE, 2012, pp. 189–96, doi:<a href=\"https://doi.org/10.1109/FPL.2012.6339370\">10.1109/FPL.2012.6339370</a>.","short":"B. Meyer, J. Schumacher, C. Plessl, J. Förstner, in: Proc. Int. Conf. on Field Programmable Logic and Applications (FPL), IEEE, 2012, pp. 189–196."},"has_accepted_license":"1"},{"quality_controlled":"1","year":"2011","page":"60-63","citation":{"short":"B. Meyer, C. Plessl, J. Förstner, in: Symp. on Application Accelerators in High Performance Computing (SAAHPC), IEEE Computer Society, 2011, pp. 60–63.","mla":"Meyer, Björn, et al. “Transformation of Scientific Algorithms to Parallel Computing Code: Subdomain Support in a MPI-Multi-GPU Backend.” <i>Symp. on Application Accelerators in High Performance Computing (SAAHPC)</i>, IEEE Computer Society, 2011, pp. 60–63, doi:<a href=\"https://doi.org/10.1109/SAAHPC.2011.12\">10.1109/SAAHPC.2011.12</a>.","bibtex":"@inproceedings{Meyer_Plessl_Förstner_2011, title={Transformation of scientific algorithms to parallel computing code: subdomain support in a MPI-multi-GPU backend}, DOI={<a href=\"https://doi.org/10.1109/SAAHPC.2011.12\">10.1109/SAAHPC.2011.12</a>}, booktitle={Symp. on Application Accelerators in High Performance Computing (SAAHPC)}, publisher={IEEE Computer Society}, author={Meyer, Björn and Plessl, Christian and Förstner, Jens}, year={2011}, pages={60–63} }","apa":"Meyer, B., Plessl, C., &#38; Förstner, J. (2011). Transformation of scientific algorithms to parallel computing code: subdomain support in a MPI-multi-GPU backend. <i>Symp. on Application Accelerators in High Performance Computing (SAAHPC)</i>, 60–63. <a href=\"https://doi.org/10.1109/SAAHPC.2011.12\">https://doi.org/10.1109/SAAHPC.2011.12</a>","ieee":"B. Meyer, C. Plessl, and J. Förstner, “Transformation of scientific algorithms to parallel computing code: subdomain support in a MPI-multi-GPU backend,” in <i>Symp. on Application Accelerators in High Performance Computing (SAAHPC)</i>, 2011, pp. 60–63, doi: <a href=\"https://doi.org/10.1109/SAAHPC.2011.12\">10.1109/SAAHPC.2011.12</a>.","chicago":"Meyer, Björn, Christian Plessl, and Jens Förstner. “Transformation of Scientific Algorithms to Parallel Computing Code: Subdomain Support in a MPI-Multi-GPU Backend.” In <i>Symp. on Application Accelerators in High Performance Computing (SAAHPC)</i>, 60–63. IEEE Computer Society, 2011. <a href=\"https://doi.org/10.1109/SAAHPC.2011.12\">https://doi.org/10.1109/SAAHPC.2011.12</a>.","ama":"Meyer B, Plessl C, Förstner J. Transformation of scientific algorithms to parallel computing code: subdomain support in a MPI-multi-GPU backend. In: <i>Symp. on Application Accelerators in High Performance Computing (SAAHPC)</i>. IEEE Computer Society; 2011:60-63. doi:<a href=\"https://doi.org/10.1109/SAAHPC.2011.12\">10.1109/SAAHPC.2011.12</a>"},"date_updated":"2023-09-26T13:44:11Z","publisher":"IEEE Computer Society","author":[{"first_name":"Björn","full_name":"Meyer, Björn","last_name":"Meyer"},{"first_name":"Christian","last_name":"Plessl","orcid":"0000-0001-5728-9982","full_name":"Plessl, Christian","id":"16153"},{"last_name":"Förstner","orcid":"0000-0001-7059-9862","id":"158","full_name":"Förstner, Jens","first_name":"Jens"}],"date_created":"2018-04-03T14:55:57Z","title":"Transformation of scientific algorithms to parallel computing code: subdomain support in a MPI-multi-GPU backend","doi":"10.1109/SAAHPC.2011.12","publication":"Symp. on Application Accelerators in High Performance Computing (SAAHPC)","type":"conference","status":"public","_id":"2194","project":[{"grant_number":"01|H11004A","name":"Enabling Heterogeneous Hardware Acceleration using Novel Programming and Scheduling Models","_id":"30"}],"department":[{"_id":"27"},{"_id":"518"},{"_id":"15"},{"_id":"78"}],"user_id":"15278","keyword":["tet_topic_hpc"],"language":[{"iso":"eng"}]}]