---
_id: '24788'
author:
- first_name: Samer
  full_name: Alhaddad, Samer
  id: '42456'
  last_name: Alhaddad
- first_name: Jens
  full_name: Förstner, Jens
  id: '158'
  last_name: Förstner
  orcid: 0000-0001-7059-9862
- first_name: Stefan
  full_name: Groth, Stefan
  last_name: Groth
- first_name: Daniel
  full_name: Grünewald, Daniel
  last_name: Grünewald
- first_name: Yevgen
  full_name: Grynko, Yevgen
  id: '26059'
  last_name: Grynko
- first_name: Frank
  full_name: Hannig, Frank
  last_name: Hannig
- first_name: Tobias
  full_name: Kenter, Tobias
  id: '3145'
  last_name: Kenter
- first_name: Franz‐Josef
  full_name: Pfreundt, Franz‐Josef
  last_name: Pfreundt
- first_name: Christian
  full_name: Plessl, Christian
  id: '16153'
  last_name: Plessl
  orcid: 0000-0001-5728-9982
- first_name: Merlind
  full_name: Schotte, Merlind
  last_name: Schotte
- first_name: Thomas
  full_name: Steinke, Thomas
  last_name: Steinke
- first_name: Jürgen
  full_name: Teich, Jürgen
  last_name: Teich
- first_name: Martin
  full_name: Weiser, Martin
  last_name: Weiser
- first_name: Florian
  full_name: Wende, Florian
  last_name: Wende
citation:
  ama: 'Alhaddad S, Förstner J, Groth S, et al. The HighPerMeshes framework for numerical
    algorithms on unstructured grids. <i>Concurrency and Computation: Practice and
    Experience</i>. Published online 2021:e6616. doi:<a href="https://doi.org/10.1002/cpe.6616">10.1002/cpe.6616</a>'
  apa: 'Alhaddad, S., Förstner, J., Groth, S., Grünewald, D., Grynko, Y., Hannig,
    F., Kenter, T., Pfreundt, F., Plessl, C., Schotte, M., Steinke, T., Teich, J.,
    Weiser, M., &#38; Wende, F. (2021). The HighPerMeshes framework for numerical
    algorithms on unstructured grids. <i>Concurrency and Computation: Practice and
    Experience</i>, e6616. <a href="https://doi.org/10.1002/cpe.6616">https://doi.org/10.1002/cpe.6616</a>'
  bibtex: '@article{Alhaddad_Förstner_Groth_Grünewald_Grynko_Hannig_Kenter_Pfreundt_Plessl_Schotte_et
    al._2021, title={The HighPerMeshes framework for numerical algorithms on unstructured
    grids}, DOI={<a href="https://doi.org/10.1002/cpe.6616">10.1002/cpe.6616</a>},
    journal={Concurrency and Computation: Practice and Experience}, author={Alhaddad,
    Samer and Förstner, Jens and Groth, Stefan and Grünewald, Daniel and Grynko, Yevgen
    and Hannig, Frank and Kenter, Tobias and Pfreundt, Franz‐Josef and Plessl, Christian
    and Schotte, Merlind and et al.}, year={2021}, pages={e6616} }'
  chicago: 'Alhaddad, Samer, Jens Förstner, Stefan Groth, Daniel Grünewald, Yevgen
    Grynko, Frank Hannig, Tobias Kenter, et al. “The HighPerMeshes Framework for Numerical
    Algorithms on Unstructured Grids.” <i>Concurrency and Computation: Practice and
    Experience</i>, 2021, e6616. <a href="https://doi.org/10.1002/cpe.6616">https://doi.org/10.1002/cpe.6616</a>.'
  ieee: 'S. Alhaddad <i>et al.</i>, “The HighPerMeshes framework for numerical algorithms
    on unstructured grids,” <i>Concurrency and Computation: Practice and Experience</i>,
    p. e6616, 2021, doi: <a href="https://doi.org/10.1002/cpe.6616">10.1002/cpe.6616</a>.'
  mla: 'Alhaddad, Samer, et al. “The HighPerMeshes Framework for Numerical Algorithms
    on Unstructured Grids.” <i>Concurrency and Computation: Practice and Experience</i>,
    2021, p. e6616, doi:<a href="https://doi.org/10.1002/cpe.6616">10.1002/cpe.6616</a>.'
  short: 'S. Alhaddad, J. Förstner, S. Groth, D. Grünewald, Y. Grynko, F. Hannig,
    T. Kenter, F. Pfreundt, C. Plessl, M. Schotte, T. Steinke, J. Teich, M. Weiser,
    F. Wende, Concurrency and Computation: Practice and Experience (2021) e6616.'
date_created: 2021-09-22T06:15:50Z
date_updated: 2023-09-26T11:42:19Z
ddc:
- '004'
department:
- _id: '61'
- _id: '230'
- _id: '27'
- _id: '518'
doi: 10.1002/cpe.6616
file:
- access_level: open_access
  content_type: application/pdf
  creator: fossie
  date_created: 2021-09-22T06:19:29Z
  date_updated: 2021-09-22T06:19:29Z
  file_id: '24789'
  file_name: 2021-09 Alhaddad - Concurrency... - The HighPerMeshes framework for numerical
    algorithms on unstructured grids.pdf
  file_size: 2300152
  relation: main_file
file_date_updated: 2021-09-22T06:19:29Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
oa: '1'
page: e6616
project:
- _id: '52'
  name: Computing Resources Provided by the Paderborn Center for Parallel Computing
- _id: '33'
  grant_number: 01|H16005A
  name: HighPerMeshes
publication: 'Concurrency and Computation: Practice and Experience'
publication_identifier:
  issn:
  - 1532-0626
  - 1532-0634
publication_status: published
quality_controlled: '1'
status: public
title: The HighPerMeshes framework for numerical algorithms on unstructured grids
type: journal_article
user_id: '15278'
year: '2021'
...
---
_id: '16852'
author:
- first_name: Stefan
  full_name: Groth, Stefan
  last_name: Groth
- first_name: Daniel
  full_name: Grünewald, Daniel
  last_name: Grünewald
- first_name: Jürgen
  full_name: Teich, Jürgen
  last_name: Teich
- first_name: Frank
  full_name: Hannig, Frank
  last_name: Hannig
citation:
  ama: 'Groth S, Grünewald D, Teich J, Hannig F. A Runtime System for Finite Element
    Methods in a Partitioned Global Address Space. In: <i>Proceedings of the 17th
    ACM International Conference on Computing Frontiers (CF ’2020)</i>. ACM; 2020.
    doi:<a href="https://doi.org/10.1145/3387902.3392628">10.1145/3387902.3392628</a>'
  apa: Groth, S., Grünewald, D., Teich, J., &#38; Hannig, F. (2020). A Runtime System
    for Finite Element Methods in a Partitioned Global Address Space. <i>Proceedings
    of the 17th ACM International Conference on Computing Frontiers (CF ’2020)</i>.
    International Conference on Computing Frontiers (CF ’20), Catania, Sicily, Italy.
    <a href="https://doi.org/10.1145/3387902.3392628">https://doi.org/10.1145/3387902.3392628</a>
  bibtex: '@inproceedings{Groth_Grünewald_Teich_Hannig_2020, title={A Runtime System
    for Finite Element Methods in a Partitioned Global Address Space}, DOI={<a href="https://doi.org/10.1145/3387902.3392628">10.1145/3387902.3392628</a>},
    booktitle={Proceedings of the 17th ACM International Conference on Computing Frontiers
    (CF ’2020)}, publisher={ACM}, author={Groth, Stefan and Grünewald, Daniel and
    Teich, Jürgen and Hannig, Frank}, year={2020} }'
  chicago: Groth, Stefan, Daniel Grünewald, Jürgen Teich, and Frank Hannig. “A Runtime
    System for Finite Element Methods in a Partitioned Global Address Space.” In <i>Proceedings
    of the 17th ACM International Conference on Computing Frontiers (CF ’2020)</i>.
    ACM, 2020. <a href="https://doi.org/10.1145/3387902.3392628">https://doi.org/10.1145/3387902.3392628</a>.
  ieee: 'S. Groth, D. Grünewald, J. Teich, and F. Hannig, “A Runtime System for Finite
    Element Methods in a Partitioned Global Address Space,” presented at the International
    Conference on Computing Frontiers (CF ’20), Catania, Sicily, Italy, 2020, doi:
    <a href="https://doi.org/10.1145/3387902.3392628">10.1145/3387902.3392628</a>.'
  mla: Groth, Stefan, et al. “A Runtime System for Finite Element Methods in a Partitioned
    Global Address Space.” <i>Proceedings of the 17th ACM International Conference
    on Computing Frontiers (CF ’2020)</i>, ACM, 2020, doi:<a href="https://doi.org/10.1145/3387902.3392628">10.1145/3387902.3392628</a>.
  short: 'S. Groth, D. Grünewald, J. Teich, F. Hannig, in: Proceedings of the 17th
    ACM International Conference on Computing Frontiers (CF ’2020), ACM, 2020.'
conference:
  end_date: 2020-05-13
  location: Catania, Sicily, Italy
  name: International Conference on Computing Frontiers (CF '20)
  start_date: 2020-05-11
date_created: 2020-04-24T10:47:04Z
date_updated: 2024-01-22T09:57:53Z
doi: 10.1145/3387902.3392628
language:
- iso: eng
project:
- _id: '33'
  grant_number: 01|H16005A
  name: HighPerMeshes
publication: Proceedings of the 17th ACM International Conference on Computing Frontiers
  (CF '2020)
publisher: ACM
status: public
title: A Runtime System for Finite Element Methods in a Partitioned Global Address
  Space
type: conference
user_id: '3145'
year: '2020'
...
---
_id: '15478'
abstract:
- lang: eng
  text: Stratix 10 FPGA cards have a good potential for the acceleration of HPC workloads
    since the Stratix 10 product line introduces devices with a large number of DSP
    and memory blocks. The high level synthesis of OpenCL codes can play a fundamental
    role for FPGAs in HPC, because it allows to implement different designs with lower
    development effort compared to hand optimized HDL. However, Stratix 10 cards are
    still hard to fully exploit using the Intel FPGA SDK for OpenCL. The implementation
    of designs with thousands of concurrent arithmetic operations often suffers from
    place and route problems that limit the maximum frequency or entirely prevent
    a successful synthesis. In order to overcome these issues for the implementation
    of the matrix multiplication, we formulate Cannon's matrix multiplication algorithm
    with regard to its efficient synthesis within the FPGA logic. We obtain a two-level
    block algorithm, where the lower level sub-matrices are multiplied using our Cannon's
    algorithm implementation. Following this design approach with multiple compute
    units, we are able to get maximum frequencies close to and above 300 MHz with
    high utilization of DSP and memory blocks. This allows for performance results
    above 1 TeraFLOPS.
author:
- first_name: Paolo
  full_name: Gorlani, Paolo
  id: '72045'
  last_name: Gorlani
- first_name: Tobias
  full_name: Kenter, Tobias
  id: '3145'
  last_name: Kenter
- first_name: Christian
  full_name: Plessl, Christian
  id: '16153'
  last_name: Plessl
  orcid: 0000-0001-5728-9982
citation:
  ama: 'Gorlani P, Kenter T, Plessl C. OpenCL Implementation of Cannon’s Matrix Multiplication
    Algorithm on Intel Stratix 10 FPGAs. In: <i>Proceedings of the International Conference
    on Field-Programmable Technology (FPT)</i>. IEEE; 2019. doi:<a href="https://doi.org/10.1109/ICFPT47387.2019.00020">10.1109/ICFPT47387.2019.00020</a>'
  apa: Gorlani, P., Kenter, T., &#38; Plessl, C. (2019). OpenCL Implementation of
    Cannon’s Matrix Multiplication Algorithm on Intel Stratix 10 FPGAs. In <i>Proceedings
    of the International Conference on Field-Programmable Technology (FPT)</i>. IEEE.
    <a href="https://doi.org/10.1109/ICFPT47387.2019.00020">https://doi.org/10.1109/ICFPT47387.2019.00020</a>
  bibtex: '@inproceedings{Gorlani_Kenter_Plessl_2019, title={OpenCL Implementation
    of Cannon’s Matrix Multiplication Algorithm on Intel Stratix 10 FPGAs}, DOI={<a
    href="https://doi.org/10.1109/ICFPT47387.2019.00020">10.1109/ICFPT47387.2019.00020</a>},
    booktitle={Proceedings of the International Conference on Field-Programmable Technology
    (FPT)}, publisher={IEEE}, author={Gorlani, Paolo and Kenter, Tobias and Plessl,
    Christian}, year={2019} }'
  chicago: Gorlani, Paolo, Tobias Kenter, and Christian Plessl. “OpenCL Implementation
    of Cannon’s Matrix Multiplication Algorithm on Intel Stratix 10 FPGAs.” In <i>Proceedings
    of the International Conference on Field-Programmable Technology (FPT)</i>. IEEE,
    2019. <a href="https://doi.org/10.1109/ICFPT47387.2019.00020">https://doi.org/10.1109/ICFPT47387.2019.00020</a>.
  ieee: P. Gorlani, T. Kenter, and C. Plessl, “OpenCL Implementation of Cannon’s Matrix
    Multiplication Algorithm on Intel Stratix 10 FPGAs,” in <i>Proceedings of the
    International Conference on Field-Programmable Technology (FPT)</i>, 2019.
  mla: Gorlani, Paolo, et al. “OpenCL Implementation of Cannon’s Matrix Multiplication
    Algorithm on Intel Stratix 10 FPGAs.” <i>Proceedings of the International Conference
    on Field-Programmable Technology (FPT)</i>, IEEE, 2019, doi:<a href="https://doi.org/10.1109/ICFPT47387.2019.00020">10.1109/ICFPT47387.2019.00020</a>.
  short: 'P. Gorlani, T. Kenter, C. Plessl, in: Proceedings of the International Conference
    on Field-Programmable Technology (FPT), IEEE, 2019.'
conference:
  name: International Conference on Field-Programmable Technology (FPT)
date_created: 2020-01-09T12:54:48Z
date_updated: 2022-01-06T06:52:26Z
ddc:
- '004'
department:
- _id: '27'
- _id: '518'
doi: 10.1109/ICFPT47387.2019.00020
file:
- access_level: closed
  content_type: application/pdf
  creator: plessl
  date_created: 2020-01-09T12:53:57Z
  date_updated: 2020-01-09T12:53:57Z
  file_id: '15479'
  file_name: gorlani19_fpt.pdf
  file_size: 250559
  relation: main_file
  success: 1
file_date_updated: 2020-01-09T12:53:57Z
has_accepted_license: '1'
language:
- iso: eng
project:
- _id: '33'
  grant_number: 01|H16005
  name: HighPerMeshes
- _id: '32'
  grant_number: PL 595/2-1
  name: Performance and Efficiency in HPC with Custom Computing
publication: Proceedings of the International Conference on Field-Programmable Technology
  (FPT)
publisher: IEEE
quality_controlled: '1'
status: public
title: OpenCL Implementation of Cannon's Matrix Multiplication Algorithm on Intel
  Stratix 10 FPGAs
type: conference
user_id: '3145'
year: '2019'
...
---
_id: '16223'
abstract:
- lang: eng
  text: Multigrid methods are fast and scalable numerical solvers for partial differential
    equations (PDEs) that possess a large design space for implementing their algorithmic
    components. Code generation approaches allow formulating multigrid methods on
    a higher level of abstraction that can then be used to derive a problem- and hardware-specific
    solutions. Since these problems have a considerable implementation variability,
    it is crucial to investigate a general mapping of core components in multigrid
    methods to the target software. With SYCL there exists a high-level C++ abstraction
    layer that is capable of targeting a multitude of architectures. We contribute
    a general way to map multigrid components to SYCL functionality and provide a
    performance evaluation for specific algorithmic component.
author:
- first_name: Stefan
  full_name: Groth, Stefan
  last_name: Groth
- first_name: Christian
  full_name: Schmitt, Christian
  last_name: Schmitt
- first_name: Jürgen
  full_name: Teich, Jürgen
  last_name: Teich
- first_name: Frank
  full_name: Hannig, Frank
  last_name: Hannig
citation:
  ama: 'Groth S, Schmitt C, Teich J, Hannig F. SYCL Code Generation for Multigrid
    Methods. In: <i>Proceedings of the 22nd International Workshop on Software and
    Compilers for Embedded Systems  - SCOPES ’19</i>. ; 2019. doi:<a href="https://doi.org/10.1145/3323439.3323984">10.1145/3323439.3323984</a>'
  apa: Groth, S., Schmitt, C., Teich, J., &#38; Hannig, F. (2019). SYCL Code Generation
    for Multigrid Methods. In <i>Proceedings of the 22nd International Workshop on
    Software and Compilers for Embedded Systems  - SCOPES ’19</i>. <a href="https://doi.org/10.1145/3323439.3323984">https://doi.org/10.1145/3323439.3323984</a>
  bibtex: '@inproceedings{Groth_Schmitt_Teich_Hannig_2019, title={SYCL Code Generation
    for Multigrid Methods}, DOI={<a href="https://doi.org/10.1145/3323439.3323984">10.1145/3323439.3323984</a>},
    booktitle={Proceedings of the 22nd International Workshop on Software and Compilers
    for Embedded Systems  - SCOPES ’19}, author={Groth, Stefan and Schmitt, Christian
    and Teich, Jürgen and Hannig, Frank}, year={2019} }'
  chicago: Groth, Stefan, Christian Schmitt, Jürgen Teich, and Frank Hannig. “SYCL
    Code Generation for Multigrid Methods.” In <i>Proceedings of the 22nd International
    Workshop on Software and Compilers for Embedded Systems  - SCOPES ’19</i>, 2019.
    <a href="https://doi.org/10.1145/3323439.3323984">https://doi.org/10.1145/3323439.3323984</a>.
  ieee: S. Groth, C. Schmitt, J. Teich, and F. Hannig, “SYCL Code Generation for Multigrid
    Methods,” in <i>Proceedings of the 22nd International Workshop on Software and
    Compilers for Embedded Systems  - SCOPES ’19</i>, 2019.
  mla: Groth, Stefan, et al. “SYCL Code Generation for Multigrid Methods.” <i>Proceedings
    of the 22nd International Workshop on Software and Compilers for Embedded Systems 
    - SCOPES ’19</i>, 2019, doi:<a href="https://doi.org/10.1145/3323439.3323984">10.1145/3323439.3323984</a>.
  short: 'S. Groth, C. Schmitt, J. Teich, F. Hannig, in: Proceedings of the 22nd International
    Workshop on Software and Compilers for Embedded Systems  - SCOPES ’19, 2019.'
date_created: 2020-03-03T14:25:00Z
date_updated: 2022-01-06T06:52:46Z
doi: 10.1145/3323439.3323984
language:
- iso: eng
project:
- _id: '33'
  grant_number: 01|H16005
  name: HighPerMeshes
publication: Proceedings of the 22nd International Workshop on Software and Compilers
  for Embedded Systems  - SCOPES '19
publication_identifier:
  isbn:
  - '9781450367622'
publication_status: published
status: public
title: SYCL Code Generation for Multigrid Methods
type: conference
user_id: '3145'
year: '2019'
...
---
_id: '3588'
abstract:
- lang: eng
  text: 'In scientific computing, unstructured meshes are a crucial foundation for
    the simulation of real-world physical phenomena. Compared to regular grids, they
    allow resembling the computational domain with a much higher accuracy, which in
    turn leads to more efficient computations.<br />There exists a wealth of supporting
    libraries and frameworks that aid programmers with the implementation of applications
    working on such grids, each built on top of existing parallelization technologies.
    However, many approaches require the programmer to introduce a different programming
    paradigm into their application or provide different variants of the code. SYCL
    is a new programming standard providing a remedy to this dilemma by building on
    standard C ++17 with its so-called single-source approach: Programmers write standard
    C ++ code and expose parallelism using C++17 keywords. The application is<br />then
    transformed into a concrete implementation by the SYCL implementation. By encapsulating
    the OpenCL ecosystem, different SYCL implementations enable not only the programming
    of CPUs but also of heterogeneous platforms such as GPUs or other devices. For
    the first time, this paper showcases a SYCL-<br />based solver for the nodal Discontinuous
    Galerkin method for Maxwell’s equations on unstructured meshes. We compare our
    solution to a previous C-based implementation with respect to programmability
    and performance on heterogeneous platforms.<br'
author:
- first_name: Ayesha
  full_name: Afzal, Ayesha
  last_name: Afzal
- first_name: Christian
  full_name: Schmitt, Christian
  last_name: Schmitt
- first_name: Samer
  full_name: Alhaddad, Samer
  id: '42456'
  last_name: Alhaddad
- first_name: Yevgen
  full_name: Grynko, Yevgen
  id: '26059'
  last_name: Grynko
- first_name: Jürgen
  full_name: Teich, Jürgen
  last_name: Teich
- first_name: Jens
  full_name: Förstner, Jens
  id: '158'
  last_name: Förstner
  orcid: 0000-0001-7059-9862
- first_name: Frank
  full_name: Hannig, Frank
  last_name: Hannig
citation:
  ama: 'Afzal A, Schmitt C, Alhaddad S, et al. Solving Maxwell’s Equations with Modern
    C++ and SYCL: A Case Study. In: <i>Proceedings of the 29th Annual IEEE International
    Conference on Application-Specific Systems, Architectures and Processors (ASAP)</i>.
    ; 2018:49-56. doi:<a href="https://doi.org/10.1109/ASAP.2018.8445127">10.1109/ASAP.2018.8445127</a>'
  apa: 'Afzal, A., Schmitt, C., Alhaddad, S., Grynko, Y., Teich, J., Förstner, J.,
    &#38; Hannig, F. (2018). Solving Maxwell’s Equations with Modern C++ and SYCL:
    A Case Study. In <i>Proceedings of the 29th Annual IEEE International Conference
    on Application-specific Systems, Architectures and Processors (ASAP)</i> (pp.
    49–56). <a href="https://doi.org/10.1109/ASAP.2018.8445127">https://doi.org/10.1109/ASAP.2018.8445127</a>'
  bibtex: '@inproceedings{Afzal_Schmitt_Alhaddad_Grynko_Teich_Förstner_Hannig_2018,
    title={Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study}, DOI={<a
    href="https://doi.org/10.1109/ASAP.2018.8445127">10.1109/ASAP.2018.8445127</a>},
    booktitle={Proceedings of the 29th Annual IEEE International Conference on Application-specific
    Systems, Architectures and Processors (ASAP)}, author={Afzal, Ayesha and Schmitt,
    Christian and Alhaddad, Samer and Grynko, Yevgen and Teich, Jürgen and Förstner,
    Jens and Hannig, Frank}, year={2018}, pages={49–56} }'
  chicago: 'Afzal, Ayesha, Christian Schmitt, Samer Alhaddad, Yevgen Grynko, Jürgen
    Teich, Jens Förstner, and Frank Hannig. “Solving Maxwell’s Equations with Modern
    C++ and SYCL: A Case Study.” In <i>Proceedings of the 29th Annual IEEE International
    Conference on Application-Specific Systems, Architectures and Processors (ASAP)</i>,
    49–56, 2018. <a href="https://doi.org/10.1109/ASAP.2018.8445127">https://doi.org/10.1109/ASAP.2018.8445127</a>.'
  ieee: 'A. Afzal <i>et al.</i>, “Solving Maxwell’s Equations with Modern C++ and
    SYCL: A Case Study,” in <i>Proceedings of the 29th Annual IEEE International Conference
    on Application-specific Systems, Architectures and Processors (ASAP)</i>, 2018,
    pp. 49–56.'
  mla: 'Afzal, Ayesha, et al. “Solving Maxwell’s Equations with Modern C++ and SYCL:
    A Case Study.” <i>Proceedings of the 29th Annual IEEE International Conference
    on Application-Specific Systems, Architectures and Processors (ASAP)</i>, 2018,
    pp. 49–56, doi:<a href="https://doi.org/10.1109/ASAP.2018.8445127">10.1109/ASAP.2018.8445127</a>.'
  short: 'A. Afzal, C. Schmitt, S. Alhaddad, Y. Grynko, J. Teich, J. Förstner, F.
    Hannig, in: Proceedings of the 29th Annual IEEE International Conference on Application-Specific
    Systems, Architectures and Processors (ASAP), 2018, pp. 49–56.'
date_created: 2018-07-23T07:12:03Z
date_updated: 2022-01-06T06:59:26Z
ddc:
- '004'
department:
- _id: '61'
doi: 10.1109/ASAP.2018.8445127
file:
- access_level: request
  content_type: application/pdf
  creator: fossie
  date_created: 2018-08-21T10:12:05Z
  date_updated: 2022-01-06T06:59:26Z
  embargo: 2019-09-03
  embargo_to: open_access
  file_id: '3986'
  file_name: 2018-08 Afzal - ASAP Proceedings - Solving Maxwell equations with modern
    C++ and SYCL.pdf
  file_size: 252186
  relation: main_file
file_date_updated: 2022-01-06T06:59:26Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
page: 49-56
project:
- _id: '33'
  grant_number: 01|H16005
  name: HighPerMeshes
- _id: '52'
  name: Computing Resources Provided by the Paderborn Center for Parallel Computing
publication: Proceedings of the 29th Annual IEEE International Conference on Application-specific
  Systems, Architectures and Processors (ASAP)
publication_identifier:
  isbn:
  - 978-1-5386-7479-6
status: public
title: 'Solving Maxwell''s Equations with Modern C++ and SYCL: A Case Study'
type: conference
user_id: '158'
year: '2018'
...
---
_id: '1588'
abstract:
- lang: eng
  text: The exploration of FPGAs as accelerators for scientific simulations has so
    far mostly been focused on small kernels of methods working on regular data structures,
    for example in the form of stencil computations for finite difference methods.
    In computational sciences, often more advanced methods are employed that promise
    better stability, convergence, locality and scaling. Unstructured meshes are shown
    to be more effective and more accurate, compared to regular grids, in representing
    computation domains of various shapes. Using unstructured meshes, the discontinuous
    Galerkin method preserves the ability to perform explicit local update operations
    for simulations in the time domain. In this work, we investigate FPGAs as target
    platform for an implementation of the nodal discontinuous Galerkin method to find
    time-domain solutions of Maxwell's equations in an unstructured mesh. When maximizing
    data reuse and fitting constant coefficients into suitably partitioned on-chip
    memory, high computational intensity allows us to implement and feed wide data
    paths with hundreds of floating point operators. By decoupling off-chip memory
    accesses from the computations, high memory bandwidth can be sustained, even for
    the irregular access pattern required by parts of the application. Using the Intel/Altera
    OpenCL SDK for FPGAs, we present different implementation variants for different
    polynomial orders of the method. In different phases of the algorithm, either
    computational or bandwidth limits of the Arria 10 platform are almost reached,
    thus outperforming a highly multithreaded CPU implementation by around 2x.
author:
- first_name: Tobias
  full_name: Kenter, Tobias
  id: '3145'
  last_name: Kenter
- first_name: Gopinath
  full_name: Mahale, Gopinath
  last_name: Mahale
- first_name: Samer
  full_name: Alhaddad, Samer
  id: '42456'
  last_name: Alhaddad
- first_name: Yevgen
  full_name: Grynko, Yevgen
  id: '26059'
  last_name: Grynko
- first_name: Christian
  full_name: Schmitt, Christian
  last_name: Schmitt
- first_name: Ayesha
  full_name: Afzal, Ayesha
  last_name: Afzal
- first_name: Frank
  full_name: Hannig, Frank
  last_name: Hannig
- first_name: Jens
  full_name: Förstner, Jens
  id: '158'
  last_name: Förstner
  orcid: 0000-0001-7059-9862
- first_name: Christian
  full_name: Plessl, Christian
  id: '16153'
  last_name: Plessl
  orcid: 0000-0001-5728-9982
citation:
  ama: 'Kenter T, Mahale G, Alhaddad S, et al. OpenCL-based FPGA Design to Accelerate
    the Nodal Discontinuous Galerkin Method for Unstructured Meshes. In: <i>Proc.
    Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)</i>. IEEE; 2018.
    doi:<a href="https://doi.org/10.1109/FCCM.2018.00037">10.1109/FCCM.2018.00037</a>'
  apa: Kenter, T., Mahale, G., Alhaddad, S., Grynko, Y., Schmitt, C., Afzal, A., Hannig,
    F., Förstner, J., &#38; Plessl, C. (2018). OpenCL-based FPGA Design to Accelerate
    the Nodal Discontinuous Galerkin Method for Unstructured Meshes. <i>Proc. Int.
    Symp. on Field-Programmable Custom Computing Machines (FCCM)</i>. Proc. Int. Symp.
    on Field-Programmable Custom Computing Machines (FCCM). <a href="https://doi.org/10.1109/FCCM.2018.00037">https://doi.org/10.1109/FCCM.2018.00037</a>
  bibtex: '@inproceedings{Kenter_Mahale_Alhaddad_Grynko_Schmitt_Afzal_Hannig_Förstner_Plessl_2018,
    title={OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin
    Method for Unstructured Meshes}, DOI={<a href="https://doi.org/10.1109/FCCM.2018.00037">10.1109/FCCM.2018.00037</a>},
    booktitle={Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)},
    publisher={IEEE}, author={Kenter, Tobias and Mahale, Gopinath and Alhaddad, Samer
    and Grynko, Yevgen and Schmitt, Christian and Afzal, Ayesha and Hannig, Frank
    and Förstner, Jens and Plessl, Christian}, year={2018} }'
  chicago: Kenter, Tobias, Gopinath Mahale, Samer Alhaddad, Yevgen Grynko, Christian
    Schmitt, Ayesha Afzal, Frank Hannig, Jens Förstner, and Christian Plessl. “OpenCL-Based
    FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured
    Meshes.” In <i>Proc. Int. Symp. on Field-Programmable Custom Computing Machines
    (FCCM)</i>. IEEE, 2018. <a href="https://doi.org/10.1109/FCCM.2018.00037">https://doi.org/10.1109/FCCM.2018.00037</a>.
  ieee: 'T. Kenter <i>et al.</i>, “OpenCL-based FPGA Design to Accelerate the Nodal
    Discontinuous Galerkin Method for Unstructured Meshes,” presented at the Proc.
    Int. Symp. on Field-Programmable Custom Computing Machines (FCCM), 2018, doi:
    <a href="https://doi.org/10.1109/FCCM.2018.00037">10.1109/FCCM.2018.00037</a>.'
  mla: Kenter, Tobias, et al. “OpenCL-Based FPGA Design to Accelerate the Nodal Discontinuous
    Galerkin Method for Unstructured Meshes.” <i>Proc. Int. Symp. on Field-Programmable
    Custom Computing Machines (FCCM)</i>, IEEE, 2018, doi:<a href="https://doi.org/10.1109/FCCM.2018.00037">10.1109/FCCM.2018.00037</a>.
  short: 'T. Kenter, G. Mahale, S. Alhaddad, Y. Grynko, C. Schmitt, A. Afzal, F. Hannig,
    J. Förstner, C. Plessl, in: Proc. Int. Symp. on Field-Programmable Custom Computing
    Machines (FCCM), IEEE, 2018.'
conference:
  name: Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)
date_created: 2018-03-22T10:48:01Z
date_updated: 2023-09-26T11:47:52Z
ddc:
- '000'
department:
- _id: '27'
- _id: '518'
- _id: '61'
doi: 10.1109/FCCM.2018.00037
file:
- access_level: closed
  content_type: application/pdf
  creator: ups
  date_created: 2018-11-02T14:45:05Z
  date_updated: 2018-11-02T14:45:05Z
  file_id: '5282'
  file_name: 08457652.pdf
  file_size: 269130
  relation: main_file
  success: 1
file_date_updated: 2018-11-02T14:45:05Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
project:
- _id: '33'
  grant_number: 01|H16005A
  name: HighPerMeshes
- _id: '1'
  grant_number: '160364472'
  name: SFB 901
- _id: '4'
  name: SFB 901 - Project Area C
- _id: '14'
  grant_number: '160364472'
  name: SFB 901 - Subproject C2
publication: Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)
publisher: IEEE
quality_controlled: '1'
status: public
title: OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method
  for Unstructured Meshes
type: conference
user_id: '15278'
year: '2018'
...
---
_id: '1592'
abstract:
- lang: eng
  text: Compared to classical HDL designs, generating FPGA with high-level synthesis
    from an OpenCL specification promises easier exploration of different design alternatives
    and, through ready-to-use infrastructure and common abstractions for host and
    memory interfaces, easier portability between different FPGA families. In this
    work, we evaluate the extent of this promise. To this end, we present a parameterized
    FDTD implementation for photonic microcavity simulations. Our design can trade-off
    different forms of parallelism and works for two independent OpenCL-based FPGA
    design flows. Hence, we can target FPGAs from different vendors and different
    FPGA families. We describe how we used pre-processor macros to achieve this flexibility
    and to work around different shortcomings of the current tools. Choosing the right
    design configurations, we are able to present two extremely competitive solutions
    for very different FPGA targets, reaching up to 172 GFLOPS sustained performance.
    With the portability and flexibility demonstrated, code developers not only avoid
    vendor lock-in, but can even make best use of real trade-offs between different
    architectures.
author:
- first_name: Tobias
  full_name: Kenter, Tobias
  id: '3145'
  last_name: Kenter
- first_name: Jens
  full_name: Förstner, Jens
  id: '158'
  last_name: Förstner
  orcid: 0000-0001-7059-9862
- first_name: Christian
  full_name: Plessl, Christian
  id: '16153'
  last_name: Plessl
  orcid: 0000-0001-5728-9982
citation:
  ama: 'Kenter T, Förstner J, Plessl C. Flexible FPGA design for FDTD using OpenCL.
    In: <i>Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)</i>.
    IEEE; 2017. doi:<a href="https://doi.org/10.23919/FPL.2017.8056844">10.23919/FPL.2017.8056844</a>'
  apa: Kenter, T., Förstner, J., &#38; Plessl, C. (2017). Flexible FPGA design for
    FDTD using OpenCL. <i>Proc. Int. Conf. on Field Programmable Logic and Applications
    (FPL)</i>. <a href="https://doi.org/10.23919/FPL.2017.8056844">https://doi.org/10.23919/FPL.2017.8056844</a>
  bibtex: '@inproceedings{Kenter_Förstner_Plessl_2017, title={Flexible FPGA design
    for FDTD using OpenCL}, DOI={<a href="https://doi.org/10.23919/FPL.2017.8056844">10.23919/FPL.2017.8056844</a>},
    booktitle={Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)},
    publisher={IEEE}, author={Kenter, Tobias and Förstner, Jens and Plessl, Christian},
    year={2017} }'
  chicago: Kenter, Tobias, Jens Förstner, and Christian Plessl. “Flexible FPGA Design
    for FDTD Using OpenCL.” In <i>Proc. Int. Conf. on Field Programmable Logic and
    Applications (FPL)</i>. IEEE, 2017. <a href="https://doi.org/10.23919/FPL.2017.8056844">https://doi.org/10.23919/FPL.2017.8056844</a>.
  ieee: 'T. Kenter, J. Förstner, and C. Plessl, “Flexible FPGA design for FDTD using
    OpenCL,” 2017, doi: <a href="https://doi.org/10.23919/FPL.2017.8056844">10.23919/FPL.2017.8056844</a>.'
  mla: Kenter, Tobias, et al. “Flexible FPGA Design for FDTD Using OpenCL.” <i>Proc.
    Int. Conf. on Field Programmable Logic and Applications (FPL)</i>, IEEE, 2017,
    doi:<a href="https://doi.org/10.23919/FPL.2017.8056844">10.23919/FPL.2017.8056844</a>.
  short: 'T. Kenter, J. Förstner, C. Plessl, in: Proc. Int. Conf. on Field Programmable
    Logic and Applications (FPL), IEEE, 2017.'
date_created: 2018-03-22T11:10:23Z
date_updated: 2023-09-26T13:24:38Z
ddc:
- '000'
department:
- _id: '27'
- _id: '518'
- _id: '61'
doi: 10.23919/FPL.2017.8056844
file:
- access_level: closed
  content_type: application/pdf
  creator: ups
  date_created: 2018-11-02T15:02:28Z
  date_updated: 2018-11-02T15:02:28Z
  file_id: '5291'
  file_name: 08056844.pdf
  file_size: 230235
  relation: main_file
  success: 1
file_date_updated: 2018-11-02T15:02:28Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
project:
- _id: '1'
  grant_number: '160364472'
  name: SFB 901
- _id: '4'
  name: SFB 901 - Project Area C
- _id: '14'
  grant_number: '160364472'
  name: SFB 901 - Subproject C2
- _id: '33'
  grant_number: 01|H16005A
  name: HighPerMeshes
- _id: '32'
  grant_number: PL 595/2-1 / 320898746
  name: Performance and Efficiency in HPC with Custom Computing
- _id: '52'
  name: Computing Resources Provided by the Paderborn Center for Parallel Computing
publication: Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)
publisher: IEEE
quality_controlled: '1'
status: public
title: Flexible FPGA design for FDTD using OpenCL
type: conference
user_id: '15278'
year: '2017'
...
