---
_id: '24788'
author:
- first_name: Samer
full_name: Alhaddad, Samer
id: '42456'
last_name: Alhaddad
- first_name: Jens
full_name: Förstner, Jens
id: '158'
last_name: Förstner
orcid: 0000-0001-7059-9862
- first_name: Stefan
full_name: Groth, Stefan
last_name: Groth
- first_name: Daniel
full_name: Grünewald, Daniel
last_name: Grünewald
- first_name: Yevgen
full_name: Grynko, Yevgen
id: '26059'
last_name: Grynko
- first_name: Frank
full_name: Hannig, Frank
last_name: Hannig
- first_name: Tobias
full_name: Kenter, Tobias
id: '3145'
last_name: Kenter
- first_name: Franz‐Josef
full_name: Pfreundt, Franz‐Josef
last_name: Pfreundt
- first_name: Christian
full_name: Plessl, Christian
id: '16153'
last_name: Plessl
orcid: 0000-0001-5728-9982
- first_name: Merlind
full_name: Schotte, Merlind
last_name: Schotte
- first_name: Thomas
full_name: Steinke, Thomas
last_name: Steinke
- first_name: Jürgen
full_name: Teich, Jürgen
last_name: Teich
- first_name: Martin
full_name: Weiser, Martin
last_name: Weiser
- first_name: Florian
full_name: Wende, Florian
last_name: Wende
citation:
ama: 'Alhaddad S, Förstner J, Groth S, et al. The HighPerMeshes framework for numerical
algorithms on unstructured grids. Concurrency and Computation: Practice and
Experience. Published online 2021:e6616. doi:10.1002/cpe.6616'
apa: 'Alhaddad, S., Förstner, J., Groth, S., Grünewald, D., Grynko, Y., Hannig,
F., Kenter, T., Pfreundt, F., Plessl, C., Schotte, M., Steinke, T., Teich, J.,
Weiser, M., & Wende, F. (2021). The HighPerMeshes framework for numerical
algorithms on unstructured grids. Concurrency and Computation: Practice and
Experience, e6616. https://doi.org/10.1002/cpe.6616'
bibtex: '@article{Alhaddad_Förstner_Groth_Grünewald_Grynko_Hannig_Kenter_Pfreundt_Plessl_Schotte_et
al._2021, title={The HighPerMeshes framework for numerical algorithms on unstructured
grids}, DOI={10.1002/cpe.6616},
journal={Concurrency and Computation: Practice and Experience}, author={Alhaddad,
Samer and Förstner, Jens and Groth, Stefan and Grünewald, Daniel and Grynko, Yevgen
and Hannig, Frank and Kenter, Tobias and Pfreundt, Franz‐Josef and Plessl, Christian
and Schotte, Merlind and et al.}, year={2021}, pages={e6616} }'
chicago: 'Alhaddad, Samer, Jens Förstner, Stefan Groth, Daniel Grünewald, Yevgen
Grynko, Frank Hannig, Tobias Kenter, et al. “The HighPerMeshes Framework for Numerical
Algorithms on Unstructured Grids.” Concurrency and Computation: Practice and
Experience, 2021, e6616. https://doi.org/10.1002/cpe.6616.'
ieee: 'S. Alhaddad et al., “The HighPerMeshes framework for numerical algorithms
on unstructured grids,” Concurrency and Computation: Practice and Experience,
p. e6616, 2021, doi: 10.1002/cpe.6616.'
mla: 'Alhaddad, Samer, et al. “The HighPerMeshes Framework for Numerical Algorithms
on Unstructured Grids.” Concurrency and Computation: Practice and Experience,
2021, p. e6616, doi:10.1002/cpe.6616.'
short: 'S. Alhaddad, J. Förstner, S. Groth, D. Grünewald, Y. Grynko, F. Hannig,
T. Kenter, F. Pfreundt, C. Plessl, M. Schotte, T. Steinke, J. Teich, M. Weiser,
F. Wende, Concurrency and Computation: Practice and Experience (2021) e6616.'
date_created: 2021-09-22T06:15:50Z
date_updated: 2023-09-26T11:42:19Z
ddc:
- '004'
department:
- _id: '61'
- _id: '230'
- _id: '27'
- _id: '518'
doi: 10.1002/cpe.6616
file:
- access_level: open_access
content_type: application/pdf
creator: fossie
date_created: 2021-09-22T06:19:29Z
date_updated: 2021-09-22T06:19:29Z
file_id: '24789'
file_name: 2021-09 Alhaddad - Concurrency... - The HighPerMeshes framework for numerical
algorithms on unstructured grids.pdf
file_size: 2300152
relation: main_file
file_date_updated: 2021-09-22T06:19:29Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
oa: '1'
page: e6616
project:
- _id: '52'
name: Computing Resources Provided by the Paderborn Center for Parallel Computing
- _id: '33'
grant_number: 01|H16005A
name: HighPerMeshes
publication: 'Concurrency and Computation: Practice and Experience'
publication_identifier:
issn:
- 1532-0626
- 1532-0634
publication_status: published
quality_controlled: '1'
status: public
title: The HighPerMeshes framework for numerical algorithms on unstructured grids
type: journal_article
user_id: '15278'
year: '2021'
...
---
_id: '16852'
author:
- first_name: Stefan
full_name: Groth, Stefan
last_name: Groth
- first_name: Daniel
full_name: Grünewald, Daniel
last_name: Grünewald
- first_name: Jürgen
full_name: Teich, Jürgen
last_name: Teich
- first_name: Frank
full_name: Hannig, Frank
last_name: Hannig
citation:
ama: 'Groth S, Grünewald D, Teich J, Hannig F. A Runtime System for Finite Element
Methods in a Partitioned Global Address Space. In: Proceedings of the 17th
ACM International Conference on Computing Frontiers (CF ’2020). ACM; 2020.
doi:10.1145/3387902.3392628'
apa: Groth, S., Grünewald, D., Teich, J., & Hannig, F. (2020). A Runtime System
for Finite Element Methods in a Partitioned Global Address Space. Proceedings
of the 17th ACM International Conference on Computing Frontiers (CF ’2020).
International Conference on Computing Frontiers (CF ’20), Catania, Sicily, Italy.
https://doi.org/10.1145/3387902.3392628
bibtex: '@inproceedings{Groth_Grünewald_Teich_Hannig_2020, title={A Runtime System
for Finite Element Methods in a Partitioned Global Address Space}, DOI={10.1145/3387902.3392628},
booktitle={Proceedings of the 17th ACM International Conference on Computing Frontiers
(CF ’2020)}, publisher={ACM}, author={Groth, Stefan and Grünewald, Daniel and
Teich, Jürgen and Hannig, Frank}, year={2020} }'
chicago: Groth, Stefan, Daniel Grünewald, Jürgen Teich, and Frank Hannig. “A Runtime
System for Finite Element Methods in a Partitioned Global Address Space.” In Proceedings
of the 17th ACM International Conference on Computing Frontiers (CF ’2020).
ACM, 2020. https://doi.org/10.1145/3387902.3392628.
ieee: 'S. Groth, D. Grünewald, J. Teich, and F. Hannig, “A Runtime System for Finite
Element Methods in a Partitioned Global Address Space,” presented at the International
Conference on Computing Frontiers (CF ’20), Catania, Sicily, Italy, 2020, doi:
10.1145/3387902.3392628.'
mla: Groth, Stefan, et al. “A Runtime System for Finite Element Methods in a Partitioned
Global Address Space.” Proceedings of the 17th ACM International Conference
on Computing Frontiers (CF ’2020), ACM, 2020, doi:10.1145/3387902.3392628.
short: 'S. Groth, D. Grünewald, J. Teich, F. Hannig, in: Proceedings of the 17th
ACM International Conference on Computing Frontiers (CF ’2020), ACM, 2020.'
conference:
end_date: 2020-05-13
location: Catania, Sicily, Italy
name: International Conference on Computing Frontiers (CF '20)
start_date: 2020-05-11
date_created: 2020-04-24T10:47:04Z
date_updated: 2024-01-22T09:57:53Z
doi: 10.1145/3387902.3392628
language:
- iso: eng
project:
- _id: '33'
grant_number: 01|H16005A
name: HighPerMeshes
publication: Proceedings of the 17th ACM International Conference on Computing Frontiers
(CF '2020)
publisher: ACM
status: public
title: A Runtime System for Finite Element Methods in a Partitioned Global Address
Space
type: conference
user_id: '3145'
year: '2020'
...
---
_id: '15478'
abstract:
- lang: eng
text: Stratix 10 FPGA cards have a good potential for the acceleration of HPC workloads
since the Stratix 10 product line introduces devices with a large number of DSP
and memory blocks. The high level synthesis of OpenCL codes can play a fundamental
role for FPGAs in HPC, because it allows to implement different designs with lower
development effort compared to hand optimized HDL. However, Stratix 10 cards are
still hard to fully exploit using the Intel FPGA SDK for OpenCL. The implementation
of designs with thousands of concurrent arithmetic operations often suffers from
place and route problems that limit the maximum frequency or entirely prevent
a successful synthesis. In order to overcome these issues for the implementation
of the matrix multiplication, we formulate Cannon's matrix multiplication algorithm
with regard to its efficient synthesis within the FPGA logic. We obtain a two-level
block algorithm, where the lower level sub-matrices are multiplied using our Cannon's
algorithm implementation. Following this design approach with multiple compute
units, we are able to get maximum frequencies close to and above 300 MHz with
high utilization of DSP and memory blocks. This allows for performance results
above 1 TeraFLOPS.
author:
- first_name: Paolo
full_name: Gorlani, Paolo
id: '72045'
last_name: Gorlani
- first_name: Tobias
full_name: Kenter, Tobias
id: '3145'
last_name: Kenter
- first_name: Christian
full_name: Plessl, Christian
id: '16153'
last_name: Plessl
orcid: 0000-0001-5728-9982
citation:
ama: 'Gorlani P, Kenter T, Plessl C. OpenCL Implementation of Cannon’s Matrix Multiplication
Algorithm on Intel Stratix 10 FPGAs. In: Proceedings of the International Conference
on Field-Programmable Technology (FPT). IEEE; 2019. doi:10.1109/ICFPT47387.2019.00020'
apa: Gorlani, P., Kenter, T., & Plessl, C. (2019). OpenCL Implementation of
Cannon’s Matrix Multiplication Algorithm on Intel Stratix 10 FPGAs. In Proceedings
of the International Conference on Field-Programmable Technology (FPT). IEEE.
https://doi.org/10.1109/ICFPT47387.2019.00020
bibtex: '@inproceedings{Gorlani_Kenter_Plessl_2019, title={OpenCL Implementation
of Cannon’s Matrix Multiplication Algorithm on Intel Stratix 10 FPGAs}, DOI={10.1109/ICFPT47387.2019.00020},
booktitle={Proceedings of the International Conference on Field-Programmable Technology
(FPT)}, publisher={IEEE}, author={Gorlani, Paolo and Kenter, Tobias and Plessl,
Christian}, year={2019} }'
chicago: Gorlani, Paolo, Tobias Kenter, and Christian Plessl. “OpenCL Implementation
of Cannon’s Matrix Multiplication Algorithm on Intel Stratix 10 FPGAs.” In Proceedings
of the International Conference on Field-Programmable Technology (FPT). IEEE,
2019. https://doi.org/10.1109/ICFPT47387.2019.00020.
ieee: P. Gorlani, T. Kenter, and C. Plessl, “OpenCL Implementation of Cannon’s Matrix
Multiplication Algorithm on Intel Stratix 10 FPGAs,” in Proceedings of the
International Conference on Field-Programmable Technology (FPT), 2019.
mla: Gorlani, Paolo, et al. “OpenCL Implementation of Cannon’s Matrix Multiplication
Algorithm on Intel Stratix 10 FPGAs.” Proceedings of the International Conference
on Field-Programmable Technology (FPT), IEEE, 2019, doi:10.1109/ICFPT47387.2019.00020.
short: 'P. Gorlani, T. Kenter, C. Plessl, in: Proceedings of the International Conference
on Field-Programmable Technology (FPT), IEEE, 2019.'
conference:
name: International Conference on Field-Programmable Technology (FPT)
date_created: 2020-01-09T12:54:48Z
date_updated: 2022-01-06T06:52:26Z
ddc:
- '004'
department:
- _id: '27'
- _id: '518'
doi: 10.1109/ICFPT47387.2019.00020
file:
- access_level: closed
content_type: application/pdf
creator: plessl
date_created: 2020-01-09T12:53:57Z
date_updated: 2020-01-09T12:53:57Z
file_id: '15479'
file_name: gorlani19_fpt.pdf
file_size: 250559
relation: main_file
success: 1
file_date_updated: 2020-01-09T12:53:57Z
has_accepted_license: '1'
language:
- iso: eng
project:
- _id: '33'
grant_number: 01|H16005
name: HighPerMeshes
- _id: '32'
grant_number: PL 595/2-1
name: Performance and Efficiency in HPC with Custom Computing
publication: Proceedings of the International Conference on Field-Programmable Technology
(FPT)
publisher: IEEE
quality_controlled: '1'
status: public
title: OpenCL Implementation of Cannon's Matrix Multiplication Algorithm on Intel
Stratix 10 FPGAs
type: conference
user_id: '3145'
year: '2019'
...
---
_id: '16223'
abstract:
- lang: eng
text: Multigrid methods are fast and scalable numerical solvers for partial differential
equations (PDEs) that possess a large design space for implementing their algorithmic
components. Code generation approaches allow formulating multigrid methods on
a higher level of abstraction that can then be used to derive a problem- and hardware-specific
solutions. Since these problems have a considerable implementation variability,
it is crucial to investigate a general mapping of core components in multigrid
methods to the target software. With SYCL there exists a high-level C++ abstraction
layer that is capable of targeting a multitude of architectures. We contribute
a general way to map multigrid components to SYCL functionality and provide a
performance evaluation for specific algorithmic component.
author:
- first_name: Stefan
full_name: Groth, Stefan
last_name: Groth
- first_name: Christian
full_name: Schmitt, Christian
last_name: Schmitt
- first_name: Jürgen
full_name: Teich, Jürgen
last_name: Teich
- first_name: Frank
full_name: Hannig, Frank
last_name: Hannig
citation:
ama: 'Groth S, Schmitt C, Teich J, Hannig F. SYCL Code Generation for Multigrid
Methods. In: Proceedings of the 22nd International Workshop on Software and
Compilers for Embedded Systems - SCOPES ’19. ; 2019. doi:10.1145/3323439.3323984'
apa: Groth, S., Schmitt, C., Teich, J., & Hannig, F. (2019). SYCL Code Generation
for Multigrid Methods. In Proceedings of the 22nd International Workshop on
Software and Compilers for Embedded Systems - SCOPES ’19. https://doi.org/10.1145/3323439.3323984
bibtex: '@inproceedings{Groth_Schmitt_Teich_Hannig_2019, title={SYCL Code Generation
for Multigrid Methods}, DOI={10.1145/3323439.3323984},
booktitle={Proceedings of the 22nd International Workshop on Software and Compilers
for Embedded Systems - SCOPES ’19}, author={Groth, Stefan and Schmitt, Christian
and Teich, Jürgen and Hannig, Frank}, year={2019} }'
chicago: Groth, Stefan, Christian Schmitt, Jürgen Teich, and Frank Hannig. “SYCL
Code Generation for Multigrid Methods.” In Proceedings of the 22nd International
Workshop on Software and Compilers for Embedded Systems - SCOPES ’19, 2019.
https://doi.org/10.1145/3323439.3323984.
ieee: S. Groth, C. Schmitt, J. Teich, and F. Hannig, “SYCL Code Generation for Multigrid
Methods,” in Proceedings of the 22nd International Workshop on Software and
Compilers for Embedded Systems - SCOPES ’19, 2019.
mla: Groth, Stefan, et al. “SYCL Code Generation for Multigrid Methods.” Proceedings
of the 22nd International Workshop on Software and Compilers for Embedded Systems
- SCOPES ’19, 2019, doi:10.1145/3323439.3323984.
short: 'S. Groth, C. Schmitt, J. Teich, F. Hannig, in: Proceedings of the 22nd International
Workshop on Software and Compilers for Embedded Systems - SCOPES ’19, 2019.'
date_created: 2020-03-03T14:25:00Z
date_updated: 2022-01-06T06:52:46Z
doi: 10.1145/3323439.3323984
language:
- iso: eng
project:
- _id: '33'
grant_number: 01|H16005
name: HighPerMeshes
publication: Proceedings of the 22nd International Workshop on Software and Compilers
for Embedded Systems - SCOPES '19
publication_identifier:
isbn:
- '9781450367622'
publication_status: published
status: public
title: SYCL Code Generation for Multigrid Methods
type: conference
user_id: '3145'
year: '2019'
...
---
_id: '3588'
abstract:
- lang: eng
text: 'In scientific computing, unstructured meshes are a crucial foundation for
the simulation of real-world physical phenomena. Compared to regular grids, they
allow resembling the computational domain with a much higher accuracy, which in
turn leads to more efficient computations.
There exists a wealth of supporting
libraries and frameworks that aid programmers with the implementation of applications
working on such grids, each built on top of existing parallelization technologies.
However, many approaches require the programmer to introduce a different programming
paradigm into their application or provide different variants of the code. SYCL
is a new programming standard providing a remedy to this dilemma by building on
standard C ++17 with its so-called single-source approach: Programmers write standard
C ++ code and expose parallelism using C++17 keywords. The application is
then
transformed into a concrete implementation by the SYCL implementation. By encapsulating
the OpenCL ecosystem, different SYCL implementations enable not only the programming
of CPUs but also of heterogeneous platforms such as GPUs or other devices. For
the first time, this paper showcases a SYCL-
based solver for the nodal Discontinuous
Galerkin method for Maxwell’s equations on unstructured meshes. We compare our
solution to a previous C-based implementation with respect to programmability
and performance on heterogeneous platforms.
Proceedings of the 29th Annual IEEE International
Conference on Application-Specific Systems, Architectures and Processors (ASAP).
; 2018:49-56. doi:10.1109/ASAP.2018.8445127'
apa: 'Afzal, A., Schmitt, C., Alhaddad, S., Grynko, Y., Teich, J., Förstner, J.,
& Hannig, F. (2018). Solving Maxwell’s Equations with Modern C++ and SYCL:
A Case Study. In Proceedings of the 29th Annual IEEE International Conference
on Application-specific Systems, Architectures and Processors (ASAP) (pp.
49–56). https://doi.org/10.1109/ASAP.2018.8445127'
bibtex: '@inproceedings{Afzal_Schmitt_Alhaddad_Grynko_Teich_Förstner_Hannig_2018,
title={Solving Maxwell’s Equations with Modern C++ and SYCL: A Case Study}, DOI={10.1109/ASAP.2018.8445127},
booktitle={Proceedings of the 29th Annual IEEE International Conference on Application-specific
Systems, Architectures and Processors (ASAP)}, author={Afzal, Ayesha and Schmitt,
Christian and Alhaddad, Samer and Grynko, Yevgen and Teich, Jürgen and Förstner,
Jens and Hannig, Frank}, year={2018}, pages={49–56} }'
chicago: 'Afzal, Ayesha, Christian Schmitt, Samer Alhaddad, Yevgen Grynko, Jürgen
Teich, Jens Förstner, and Frank Hannig. “Solving Maxwell’s Equations with Modern
C++ and SYCL: A Case Study.” In Proceedings of the 29th Annual IEEE International
Conference on Application-Specific Systems, Architectures and Processors (ASAP),
49–56, 2018. https://doi.org/10.1109/ASAP.2018.8445127.'
ieee: 'A. Afzal et al., “Solving Maxwell’s Equations with Modern C++ and
SYCL: A Case Study,” in Proceedings of the 29th Annual IEEE International Conference
on Application-specific Systems, Architectures and Processors (ASAP), 2018,
pp. 49–56.'
mla: 'Afzal, Ayesha, et al. “Solving Maxwell’s Equations with Modern C++ and SYCL:
A Case Study.” Proceedings of the 29th Annual IEEE International Conference
on Application-Specific Systems, Architectures and Processors (ASAP), 2018,
pp. 49–56, doi:10.1109/ASAP.2018.8445127.'
short: 'A. Afzal, C. Schmitt, S. Alhaddad, Y. Grynko, J. Teich, J. Förstner, F.
Hannig, in: Proceedings of the 29th Annual IEEE International Conference on Application-Specific
Systems, Architectures and Processors (ASAP), 2018, pp. 49–56.'
date_created: 2018-07-23T07:12:03Z
date_updated: 2022-01-06T06:59:26Z
ddc:
- '004'
department:
- _id: '61'
doi: 10.1109/ASAP.2018.8445127
file:
- access_level: request
content_type: application/pdf
creator: fossie
date_created: 2018-08-21T10:12:05Z
date_updated: 2022-01-06T06:59:26Z
embargo: 2019-09-03
embargo_to: open_access
file_id: '3986'
file_name: 2018-08 Afzal - ASAP Proceedings - Solving Maxwell equations with modern
C++ and SYCL.pdf
file_size: 252186
relation: main_file
file_date_updated: 2022-01-06T06:59:26Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
page: 49-56
project:
- _id: '33'
grant_number: 01|H16005
name: HighPerMeshes
- _id: '52'
name: Computing Resources Provided by the Paderborn Center for Parallel Computing
publication: Proceedings of the 29th Annual IEEE International Conference on Application-specific
Systems, Architectures and Processors (ASAP)
publication_identifier:
isbn:
- 978-1-5386-7479-6
status: public
title: 'Solving Maxwell''s Equations with Modern C++ and SYCL: A Case Study'
type: conference
user_id: '158'
year: '2018'
...
---
_id: '1588'
abstract:
- lang: eng
text: The exploration of FPGAs as accelerators for scientific simulations has so
far mostly been focused on small kernels of methods working on regular data structures,
for example in the form of stencil computations for finite difference methods.
In computational sciences, often more advanced methods are employed that promise
better stability, convergence, locality and scaling. Unstructured meshes are shown
to be more effective and more accurate, compared to regular grids, in representing
computation domains of various shapes. Using unstructured meshes, the discontinuous
Galerkin method preserves the ability to perform explicit local update operations
for simulations in the time domain. In this work, we investigate FPGAs as target
platform for an implementation of the nodal discontinuous Galerkin method to find
time-domain solutions of Maxwell's equations in an unstructured mesh. When maximizing
data reuse and fitting constant coefficients into suitably partitioned on-chip
memory, high computational intensity allows us to implement and feed wide data
paths with hundreds of floating point operators. By decoupling off-chip memory
accesses from the computations, high memory bandwidth can be sustained, even for
the irregular access pattern required by parts of the application. Using the Intel/Altera
OpenCL SDK for FPGAs, we present different implementation variants for different
polynomial orders of the method. In different phases of the algorithm, either
computational or bandwidth limits of the Arria 10 platform are almost reached,
thus outperforming a highly multithreaded CPU implementation by around 2x.
author:
- first_name: Tobias
full_name: Kenter, Tobias
id: '3145'
last_name: Kenter
- first_name: Gopinath
full_name: Mahale, Gopinath
last_name: Mahale
- first_name: Samer
full_name: Alhaddad, Samer
id: '42456'
last_name: Alhaddad
- first_name: Yevgen
full_name: Grynko, Yevgen
id: '26059'
last_name: Grynko
- first_name: Christian
full_name: Schmitt, Christian
last_name: Schmitt
- first_name: Ayesha
full_name: Afzal, Ayesha
last_name: Afzal
- first_name: Frank
full_name: Hannig, Frank
last_name: Hannig
- first_name: Jens
full_name: Förstner, Jens
id: '158'
last_name: Förstner
orcid: 0000-0001-7059-9862
- first_name: Christian
full_name: Plessl, Christian
id: '16153'
last_name: Plessl
orcid: 0000-0001-5728-9982
citation:
ama: 'Kenter T, Mahale G, Alhaddad S, et al. OpenCL-based FPGA Design to Accelerate
the Nodal Discontinuous Galerkin Method for Unstructured Meshes. In: Proc.
Int. Symp. on Field-Programmable Custom Computing Machines (FCCM). IEEE; 2018.
doi:10.1109/FCCM.2018.00037'
apa: Kenter, T., Mahale, G., Alhaddad, S., Grynko, Y., Schmitt, C., Afzal, A., Hannig,
F., Förstner, J., & Plessl, C. (2018). OpenCL-based FPGA Design to Accelerate
the Nodal Discontinuous Galerkin Method for Unstructured Meshes. Proc. Int.
Symp. on Field-Programmable Custom Computing Machines (FCCM). Proc. Int. Symp.
on Field-Programmable Custom Computing Machines (FCCM). https://doi.org/10.1109/FCCM.2018.00037
bibtex: '@inproceedings{Kenter_Mahale_Alhaddad_Grynko_Schmitt_Afzal_Hannig_Förstner_Plessl_2018,
title={OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin
Method for Unstructured Meshes}, DOI={10.1109/FCCM.2018.00037},
booktitle={Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)},
publisher={IEEE}, author={Kenter, Tobias and Mahale, Gopinath and Alhaddad, Samer
and Grynko, Yevgen and Schmitt, Christian and Afzal, Ayesha and Hannig, Frank
and Förstner, Jens and Plessl, Christian}, year={2018} }'
chicago: Kenter, Tobias, Gopinath Mahale, Samer Alhaddad, Yevgen Grynko, Christian
Schmitt, Ayesha Afzal, Frank Hannig, Jens Förstner, and Christian Plessl. “OpenCL-Based
FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method for Unstructured
Meshes.” In Proc. Int. Symp. on Field-Programmable Custom Computing Machines
(FCCM). IEEE, 2018. https://doi.org/10.1109/FCCM.2018.00037.
ieee: 'T. Kenter et al., “OpenCL-based FPGA Design to Accelerate the Nodal
Discontinuous Galerkin Method for Unstructured Meshes,” presented at the Proc.
Int. Symp. on Field-Programmable Custom Computing Machines (FCCM), 2018, doi:
10.1109/FCCM.2018.00037.'
mla: Kenter, Tobias, et al. “OpenCL-Based FPGA Design to Accelerate the Nodal Discontinuous
Galerkin Method for Unstructured Meshes.” Proc. Int. Symp. on Field-Programmable
Custom Computing Machines (FCCM), IEEE, 2018, doi:10.1109/FCCM.2018.00037.
short: 'T. Kenter, G. Mahale, S. Alhaddad, Y. Grynko, C. Schmitt, A. Afzal, F. Hannig,
J. Förstner, C. Plessl, in: Proc. Int. Symp. on Field-Programmable Custom Computing
Machines (FCCM), IEEE, 2018.'
conference:
name: Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)
date_created: 2018-03-22T10:48:01Z
date_updated: 2023-09-26T11:47:52Z
ddc:
- '000'
department:
- _id: '27'
- _id: '518'
- _id: '61'
doi: 10.1109/FCCM.2018.00037
file:
- access_level: closed
content_type: application/pdf
creator: ups
date_created: 2018-11-02T14:45:05Z
date_updated: 2018-11-02T14:45:05Z
file_id: '5282'
file_name: 08457652.pdf
file_size: 269130
relation: main_file
success: 1
file_date_updated: 2018-11-02T14:45:05Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
project:
- _id: '33'
grant_number: 01|H16005A
name: HighPerMeshes
- _id: '1'
grant_number: '160364472'
name: SFB 901
- _id: '4'
name: SFB 901 - Project Area C
- _id: '14'
grant_number: '160364472'
name: SFB 901 - Subproject C2
publication: Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)
publisher: IEEE
quality_controlled: '1'
status: public
title: OpenCL-based FPGA Design to Accelerate the Nodal Discontinuous Galerkin Method
for Unstructured Meshes
type: conference
user_id: '15278'
year: '2018'
...
---
_id: '1592'
abstract:
- lang: eng
text: Compared to classical HDL designs, generating FPGA with high-level synthesis
from an OpenCL specification promises easier exploration of different design alternatives
and, through ready-to-use infrastructure and common abstractions for host and
memory interfaces, easier portability between different FPGA families. In this
work, we evaluate the extent of this promise. To this end, we present a parameterized
FDTD implementation for photonic microcavity simulations. Our design can trade-off
different forms of parallelism and works for two independent OpenCL-based FPGA
design flows. Hence, we can target FPGAs from different vendors and different
FPGA families. We describe how we used pre-processor macros to achieve this flexibility
and to work around different shortcomings of the current tools. Choosing the right
design configurations, we are able to present two extremely competitive solutions
for very different FPGA targets, reaching up to 172 GFLOPS sustained performance.
With the portability and flexibility demonstrated, code developers not only avoid
vendor lock-in, but can even make best use of real trade-offs between different
architectures.
author:
- first_name: Tobias
full_name: Kenter, Tobias
id: '3145'
last_name: Kenter
- first_name: Jens
full_name: Förstner, Jens
id: '158'
last_name: Förstner
orcid: 0000-0001-7059-9862
- first_name: Christian
full_name: Plessl, Christian
id: '16153'
last_name: Plessl
orcid: 0000-0001-5728-9982
citation:
ama: 'Kenter T, Förstner J, Plessl C. Flexible FPGA design for FDTD using OpenCL.
In: Proc. Int. Conf. on Field Programmable Logic and Applications (FPL).
IEEE; 2017. doi:10.23919/FPL.2017.8056844'
apa: Kenter, T., Förstner, J., & Plessl, C. (2017). Flexible FPGA design for
FDTD using OpenCL. Proc. Int. Conf. on Field Programmable Logic and Applications
(FPL). https://doi.org/10.23919/FPL.2017.8056844
bibtex: '@inproceedings{Kenter_Förstner_Plessl_2017, title={Flexible FPGA design
for FDTD using OpenCL}, DOI={10.23919/FPL.2017.8056844},
booktitle={Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)},
publisher={IEEE}, author={Kenter, Tobias and Förstner, Jens and Plessl, Christian},
year={2017} }'
chicago: Kenter, Tobias, Jens Förstner, and Christian Plessl. “Flexible FPGA Design
for FDTD Using OpenCL.” In Proc. Int. Conf. on Field Programmable Logic and
Applications (FPL). IEEE, 2017. https://doi.org/10.23919/FPL.2017.8056844.
ieee: 'T. Kenter, J. Förstner, and C. Plessl, “Flexible FPGA design for FDTD using
OpenCL,” 2017, doi: 10.23919/FPL.2017.8056844.'
mla: Kenter, Tobias, et al. “Flexible FPGA Design for FDTD Using OpenCL.” Proc.
Int. Conf. on Field Programmable Logic and Applications (FPL), IEEE, 2017,
doi:10.23919/FPL.2017.8056844.
short: 'T. Kenter, J. Förstner, C. Plessl, in: Proc. Int. Conf. on Field Programmable
Logic and Applications (FPL), IEEE, 2017.'
date_created: 2018-03-22T11:10:23Z
date_updated: 2023-09-26T13:24:38Z
ddc:
- '000'
department:
- _id: '27'
- _id: '518'
- _id: '61'
doi: 10.23919/FPL.2017.8056844
file:
- access_level: closed
content_type: application/pdf
creator: ups
date_created: 2018-11-02T15:02:28Z
date_updated: 2018-11-02T15:02:28Z
file_id: '5291'
file_name: 08056844.pdf
file_size: 230235
relation: main_file
success: 1
file_date_updated: 2018-11-02T15:02:28Z
has_accepted_license: '1'
keyword:
- tet_topic_hpc
language:
- iso: eng
project:
- _id: '1'
grant_number: '160364472'
name: SFB 901
- _id: '4'
name: SFB 901 - Project Area C
- _id: '14'
grant_number: '160364472'
name: SFB 901 - Subproject C2
- _id: '33'
grant_number: 01|H16005A
name: HighPerMeshes
- _id: '32'
grant_number: PL 595/2-1 / 320898746
name: Performance and Efficiency in HPC with Custom Computing
- _id: '52'
name: Computing Resources Provided by the Paderborn Center for Parallel Computing
publication: Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)
publisher: IEEE
quality_controlled: '1'
status: public
title: Flexible FPGA design for FDTD using OpenCL
type: conference
user_id: '15278'
year: '2017'
...