[{"doi":"10.1109/iccd.2018.00060","title":"Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators","date_created":"2020-04-06T10:33:58Z","author":[{"first_name":"Lukas","last_name":"Sommer","full_name":"Sommer, Lukas"},{"first_name":"Julian","full_name":"Oppermann, Julian","last_name":"Oppermann"},{"first_name":"Alejandro","last_name":"Molina","full_name":"Molina, Alejandro"},{"last_name":"Binnig","full_name":"Binnig, Carsten","first_name":"Carsten"},{"last_name":"Kersting","full_name":"Kersting, Kristian","first_name":"Kristian"},{"first_name":"Andreas","last_name":"Koch","full_name":"Koch, Andreas"}],"date_updated":"2022-01-06T06:52:50Z","citation":{"short":"L. Sommer, J. Oppermann, A. Molina, C. Binnig, K. Kersting, A. Koch, in: 2018 IEEE 36th International Conference on Computer Design (ICCD), 2019.","bibtex":"@inproceedings{Sommer_Oppermann_Molina_Binnig_Kersting_Koch_2019, title={Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators}, DOI={<a href=\"https://doi.org/10.1109/iccd.2018.00060\">10.1109/iccd.2018.00060</a>}, booktitle={2018 IEEE 36th International Conference on Computer Design (ICCD)}, author={Sommer, Lukas and Oppermann, Julian and Molina, Alejandro and Binnig, Carsten and Kersting, Kristian and Koch, Andreas}, year={2019} }","mla":"Sommer, Lukas, et al. “Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators.” <i>2018 IEEE 36th International Conference on Computer Design (ICCD)</i>, 2019, doi:<a href=\"https://doi.org/10.1109/iccd.2018.00060\">10.1109/iccd.2018.00060</a>.","apa":"Sommer, L., Oppermann, J., Molina, A., Binnig, C., Kersting, K., &#38; Koch, A. (2019). Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators. In <i>2018 IEEE 36th International Conference on Computer Design (ICCD)</i>. <a href=\"https://doi.org/10.1109/iccd.2018.00060\">https://doi.org/10.1109/iccd.2018.00060</a>","ama":"Sommer L, Oppermann J, Molina A, Binnig C, Kersting K, Koch A. Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators. In: <i>2018 IEEE 36th International Conference on Computer Design (ICCD)</i>. ; 2019. doi:<a href=\"https://doi.org/10.1109/iccd.2018.00060\">10.1109/iccd.2018.00060</a>","chicago":"Sommer, Lukas, Julian Oppermann, Alejandro Molina, Carsten Binnig, Kristian Kersting, and Andreas Koch. “Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators.” In <i>2018 IEEE 36th International Conference on Computer Design (ICCD)</i>, 2019. <a href=\"https://doi.org/10.1109/iccd.2018.00060\">https://doi.org/10.1109/iccd.2018.00060</a>.","ieee":"L. Sommer, J. Oppermann, A. Molina, C. Binnig, K. Kersting, and A. Koch, “Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators,” in <i>2018 IEEE 36th International Conference on Computer Design (ICCD)</i>, 2019."},"year":"2019","publication_identifier":{"isbn":["9781538684771"]},"publication_status":"published","language":[{"iso":"eng"}],"keyword":["pc2-harp-ressources"],"user_id":"61189","_id":"16413","status":"public","abstract":[{"text":"In recent years, FPGAs have been successfully employed for the implementation of efficient, application-specific accelerators for a wide range of machine learning tasks. In this work, we consider probabilistic models, namely, (Mixed) Sum-Product Networks (SPN), a deep architecture that can provide tractable inference for multivariate distributions over mixed data-sources. We develop a fully pipelined FPGA accelerator architecture, including a pipelined interface to external memory, for the inference in (mixed) SPNs. To meet the precision constraints of SPNs, all computations are conducted using double-precision floating point arithmetic. Starting from an input description, the custom FPGA-accelerator is synthesized fully automatically by our tool flow. To the best of our knowledge, this work is the first approach to offload the SPN inference problem to FPGA-based accelerators. Our evaluation shows that the SPN inference problem benefits from offloading to our pipelined FPGA accelerator architecture.","lang":"eng"}],"publication":"2018 IEEE 36th International Conference on Computer Design (ICCD)","type":"conference"},{"keyword":["pc2-harp-ressources"],"language":[{"iso":"eng"}],"_id":"16417","user_id":"61189","abstract":[{"lang":"eng","text":"The performance of High-Level Synthesis (HLS) applications with irregular data structures is limited by its imperative programming paradigm like C/C++. In this paper, we show that constructing concurrent data structures with channels, a programming construct derived from CSP (communicating sequential processes) paradigm, is an effective approach to improve the performance of these applications. We evaluate concurrent data structure for FPGA by synthesizing a K-means clustering algorithm on the Intel HARP2 platform. A fully pipelined KMC processing element can be synthesized from OpenCL with the help of a SPSC (single-producer-single-consumer) queue and stack built from channels, achieving 15.2x speedup over a sequential baseline. The number of processing element can be scaled up by leveraging a MPMC (multiple-producer-multiple-consumer) stack with work distribution for dynamic load balance. Evaluation shows that an additional 3.5x speedup can be achieved when 4 processing element is instantiated. These results show that the concurrent data structure built with channels has great potential for improving the parallelism of HLS applications. We hope that our study will stimulate further research into the potential of channel-based HLS."}],"status":"public","publication":"Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays","type":"conference","title":"Constructing Concurrent Data Structures on FPGA with Channels","doi":"10.1145/3289602.3293921","date_updated":"2022-01-06T06:52:50Z","date_created":"2020-04-06T11:53:34Z","author":[{"first_name":"Hui","last_name":"Yan","full_name":"Yan, Hui"},{"last_name":"Li","full_name":"Li, Zhaoshi","first_name":"Zhaoshi"},{"last_name":"Liu","full_name":"Liu, Leibo","first_name":"Leibo"},{"first_name":"Shouyi","full_name":"Yin, Shouyi","last_name":"Yin"},{"first_name":"Shaojun","last_name":"Wei","full_name":"Wei, Shaojun"}],"year":"2019","citation":{"chicago":"Yan, Hui, Zhaoshi Li, Leibo Liu, Shouyi Yin, and Shaojun Wei. “Constructing Concurrent Data Structures on FPGA with Channels.” In <i>Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays</i>, 2019. <a href=\"https://doi.org/10.1145/3289602.3293921\">https://doi.org/10.1145/3289602.3293921</a>.","ieee":"H. Yan, Z. Li, L. Liu, S. Yin, and S. Wei, “Constructing Concurrent Data Structures on FPGA with Channels,” in <i>Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays</i>, 2019.","ama":"Yan H, Li Z, Liu L, Yin S, Wei S. Constructing Concurrent Data Structures on FPGA with Channels. In: <i>Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays</i>. ; 2019. doi:<a href=\"https://doi.org/10.1145/3289602.3293921\">10.1145/3289602.3293921</a>","apa":"Yan, H., Li, Z., Liu, L., Yin, S., &#38; Wei, S. (2019). Constructing Concurrent Data Structures on FPGA with Channels. In <i>Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays</i>. <a href=\"https://doi.org/10.1145/3289602.3293921\">https://doi.org/10.1145/3289602.3293921</a>","mla":"Yan, Hui, et al. “Constructing Concurrent Data Structures on FPGA with Channels.” <i>Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays</i>, 2019, doi:<a href=\"https://doi.org/10.1145/3289602.3293921\">10.1145/3289602.3293921</a>.","short":"H. Yan, Z. Li, L. Liu, S. Yin, S. Wei, in: Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays, 2019.","bibtex":"@inproceedings{Yan_Li_Liu_Yin_Wei_2019, title={Constructing Concurrent Data Structures on FPGA with Channels}, DOI={<a href=\"https://doi.org/10.1145/3289602.3293921\">10.1145/3289602.3293921</a>}, booktitle={Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays}, author={Yan, Hui and Li, Zhaoshi and Liu, Leibo and Yin, Shouyi and Wei, Shaojun}, year={2019} }"},"publication_identifier":{"isbn":["9781450361378"]},"publication_status":"published"},{"citation":{"bibtex":"@article{Wang_Thiagaraj_Sinnen_2019, title={Combining Multiple Optimized FPGA-based Pulsar Search Modules Using OpenCL}, DOI={<a href=\"https://doi.org/10.1142/s2251171719500089\">10.1142/s2251171719500089</a>}, number={1950008}, journal={Journal of Astronomical Instrumentation}, author={Wang, Haomiao and Thiagaraj, Prabu and Sinnen, Oliver}, year={2019} }","mla":"Wang, Haomiao, et al. “Combining Multiple Optimized FPGA-Based Pulsar Search Modules Using OpenCL.” <i>Journal of Astronomical Instrumentation</i>, 1950008, 2019, doi:<a href=\"https://doi.org/10.1142/s2251171719500089\">10.1142/s2251171719500089</a>.","short":"H. Wang, P. Thiagaraj, O. Sinnen, Journal of Astronomical Instrumentation (2019).","apa":"Wang, H., Thiagaraj, P., &#38; Sinnen, O. (2019). Combining Multiple Optimized FPGA-based Pulsar Search Modules Using OpenCL. <i>Journal of Astronomical Instrumentation</i>. <a href=\"https://doi.org/10.1142/s2251171719500089\">https://doi.org/10.1142/s2251171719500089</a>","ama":"Wang H, Thiagaraj P, Sinnen O. Combining Multiple Optimized FPGA-based Pulsar Search Modules Using OpenCL. <i>Journal of Astronomical Instrumentation</i>. 2019. doi:<a href=\"https://doi.org/10.1142/s2251171719500089\">10.1142/s2251171719500089</a>","chicago":"Wang, Haomiao, Prabu Thiagaraj, and Oliver Sinnen. “Combining Multiple Optimized FPGA-Based Pulsar Search Modules Using OpenCL.” <i>Journal of Astronomical Instrumentation</i>, 2019. <a href=\"https://doi.org/10.1142/s2251171719500089\">https://doi.org/10.1142/s2251171719500089</a>.","ieee":"H. Wang, P. Thiagaraj, and O. Sinnen, “Combining Multiple Optimized FPGA-based Pulsar Search Modules Using OpenCL,” <i>Journal of Astronomical Instrumentation</i>, 2019."},"year":"2019","publication_status":"published","publication_identifier":{"issn":["2251-1717","2251-1725"]},"doi":"10.1142/s2251171719500089","title":"Combining Multiple Optimized FPGA-based Pulsar Search Modules Using OpenCL","author":[{"last_name":"Wang","full_name":"Wang, Haomiao","first_name":"Haomiao"},{"first_name":"Prabu","full_name":"Thiagaraj, Prabu","last_name":"Thiagaraj"},{"last_name":"Sinnen","full_name":"Sinnen, Oliver","first_name":"Oliver"}],"date_created":"2020-04-06T12:00:06Z","date_updated":"2022-01-06T06:52:50Z","status":"public","abstract":[{"lang":"eng","text":"<jats:p> Field-Programmable Gate Arrays (FPGAs) are widely used in the central signal processing design of the Square Kilometer Array (SKA) as hardware accelerators. The frequency domain acceleration search (FDAS) module is an important part of the SKA1-MID pulsar search engine. To develop for a yet to be finalized hardware, for cross-discipline interoperability and to achieve fast prototyping, OpenCL as a high-level FPGA synthesis approaches employed to create the sub-modules of FDAS. The FT convolution and the harmonic-summing plus some other minor sub-modules are elements in the FDAS module that have been well-optimized separately before. In this paper, we explore the design space of combining well-optimized designs, dealing with the ensuing need to trade-off and compromise. Pipeline computing is employed to handle multiple input arrays at high speed. The hardware target is to employ multiple high-end FPGAs to process the combined FDAS module. The results show interesting consequences, where the best individual solutions are not necessarily the best solutions for the speed of a pipeline where FPGA resources and memory bandwidth need to be shared. By proposing multiple buffering techniques to the pipeline, the combined FDAS module can achieve up to 2[Formula: see text] speedup over implementations without pipeline computing. We perform an extensive experimental evaluation on multiple high-end FPGA cards hosted in a workstation and compare to a technology comparable mid-range GPU. </jats:p>"}],"type":"journal_article","publication":"Journal of Astronomical Instrumentation","language":[{"iso":"eng"}],"article_number":"1950008","keyword":["pc2-harp-ressources"],"user_id":"61189","_id":"16420"},{"article_number":"149","keyword":["pc2-harp-ressources"],"language":[{"iso":"eng"}],"_id":"16422","user_id":"61189","abstract":[{"lang":"eng","text":"<jats:p>Intel recently introduced the Heterogeneous Architecture Research Platform, HARP. In this platform, the Central Processing Unit and a Field-Programmable Gate Array are connected through a high-bandwidth, low-latency interconnect and both share DRAM memory. For this platform, Open Computing Language (OpenCL), a High-Level Synthesis (HLS) language, is made available. By making use of HLS, a faster design cycle can be achieved compared to programming in a traditional hardware description language. This, however, comes at the cost of having less control over the hardware implementation. We will investigate how OpenCL can be applied to implement a real-time guided image filter on the HARP platform. In the first phase, the performance-critical parameters of the OpenCL programming model are defined using several specialized benchmarks. In a second phase, the guided image filter algorithm is implemented using the insights gained in the first phase. Both a floating-point and a fixed-point implementation were developed for this algorithm, based on a sliding window implementation. This resulted in a maximum floating-point performance of 135 GFLOPS, a maximum fixed-point performance of 430 GOPS and a throughput of HD color images at 74 frames per second.</jats:p>"}],"status":"public","type":"journal_article","publication":"Algorithms","title":"Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL","doi":"10.3390/a12080149","date_updated":"2022-01-06T06:52:50Z","date_created":"2020-04-06T12:08:24Z","author":[{"first_name":"Thomas","last_name":"Faict","full_name":"Faict, Thomas"},{"first_name":"Erik H.","last_name":"D’Hollander","full_name":"D’Hollander, Erik H."},{"first_name":"Bart","last_name":"Goossens","full_name":"Goossens, Bart"}],"year":"2019","citation":{"ama":"Faict T, D’Hollander EH, Goossens B. Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL. <i>Algorithms</i>. 2019. doi:<a href=\"https://doi.org/10.3390/a12080149\">10.3390/a12080149</a>","chicago":"Faict, Thomas, Erik H. D’Hollander, and Bart Goossens. “Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL.” <i>Algorithms</i>, 2019. <a href=\"https://doi.org/10.3390/a12080149\">https://doi.org/10.3390/a12080149</a>.","ieee":"T. Faict, E. H. D’Hollander, and B. Goossens, “Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL,” <i>Algorithms</i>, 2019.","apa":"Faict, T., D’Hollander, E. H., &#38; Goossens, B. (2019). Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL. <i>Algorithms</i>. <a href=\"https://doi.org/10.3390/a12080149\">https://doi.org/10.3390/a12080149</a>","mla":"Faict, Thomas, et al. “Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL.” <i>Algorithms</i>, 149, 2019, doi:<a href=\"https://doi.org/10.3390/a12080149\">10.3390/a12080149</a>.","short":"T. Faict, E.H. D’Hollander, B. Goossens, Algorithms (2019).","bibtex":"@article{Faict_D’Hollander_Goossens_2019, title={Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL}, DOI={<a href=\"https://doi.org/10.3390/a12080149\">10.3390/a12080149</a>}, number={149}, journal={Algorithms}, author={Faict, Thomas and D’Hollander, Erik H. and Goossens, Bart}, year={2019} }"},"publication_status":"published","publication_identifier":{"issn":["1999-4893"]}},{"title":"Parallel multiprocessing and scheduling on the heterogeneous Xeon+FPGA platform","doi":"10.1007/s11227-019-02935-1","date_updated":"2022-01-06T06:52:50Z","author":[{"last_name":"Rodríguez","full_name":"Rodríguez, Andrés","first_name":"Andrés"},{"last_name":"Navarro","full_name":"Navarro, Angeles","first_name":"Angeles"},{"first_name":"Rafael","last_name":"Asenjo","full_name":"Asenjo, Rafael"},{"first_name":"Francisco","full_name":"Corbera, Francisco","last_name":"Corbera"},{"last_name":"Gran","full_name":"Gran, Rubén","first_name":"Rubén"},{"last_name":"Suárez","full_name":"Suárez, Darío","first_name":"Darío"},{"first_name":"Jose","full_name":"Nunez-Yanez, Jose","last_name":"Nunez-Yanez"}],"date_created":"2020-04-06T12:09:25Z","year":"2019","citation":{"ama":"Rodríguez A, Navarro A, Asenjo R, et al. Parallel multiprocessing and scheduling on the heterogeneous Xeon+FPGA platform. <i>The Journal of Supercomputing</i>. 2019. doi:<a href=\"https://doi.org/10.1007/s11227-019-02935-1\">10.1007/s11227-019-02935-1</a>","ieee":"A. Rodríguez <i>et al.</i>, “Parallel multiprocessing and scheduling on the heterogeneous Xeon+FPGA platform,” <i>The Journal of Supercomputing</i>, 2019.","chicago":"Rodríguez, Andrés, Angeles Navarro, Rafael Asenjo, Francisco Corbera, Rubén Gran, Darío Suárez, and Jose Nunez-Yanez. “Parallel Multiprocessing and Scheduling on the Heterogeneous Xeon+FPGA Platform.” <i>The Journal of Supercomputing</i>, 2019. <a href=\"https://doi.org/10.1007/s11227-019-02935-1\">https://doi.org/10.1007/s11227-019-02935-1</a>.","mla":"Rodríguez, Andrés, et al. “Parallel Multiprocessing and Scheduling on the Heterogeneous Xeon+FPGA Platform.” <i>The Journal of Supercomputing</i>, 2019, doi:<a href=\"https://doi.org/10.1007/s11227-019-02935-1\">10.1007/s11227-019-02935-1</a>.","short":"A. Rodríguez, A. Navarro, R. Asenjo, F. Corbera, R. Gran, D. Suárez, J. Nunez-Yanez, The Journal of Supercomputing (2019).","bibtex":"@article{Rodríguez_Navarro_Asenjo_Corbera_Gran_Suárez_Nunez-Yanez_2019, title={Parallel multiprocessing and scheduling on the heterogeneous Xeon+FPGA platform}, DOI={<a href=\"https://doi.org/10.1007/s11227-019-02935-1\">10.1007/s11227-019-02935-1</a>}, journal={The Journal of Supercomputing}, author={Rodríguez, Andrés and Navarro, Angeles and Asenjo, Rafael and Corbera, Francisco and Gran, Rubén and Suárez, Darío and Nunez-Yanez, Jose}, year={2019} }","apa":"Rodríguez, A., Navarro, A., Asenjo, R., Corbera, F., Gran, R., Suárez, D., &#38; Nunez-Yanez, J. (2019). Parallel multiprocessing and scheduling on the heterogeneous Xeon+FPGA platform. <i>The Journal of Supercomputing</i>. <a href=\"https://doi.org/10.1007/s11227-019-02935-1\">https://doi.org/10.1007/s11227-019-02935-1</a>"},"publication_status":"published","publication_identifier":{"issn":["0920-8542","1573-0484"]},"keyword":["pc2-harp-ressources"],"language":[{"iso":"eng"}],"_id":"16423","user_id":"61189","abstract":[{"lang":"eng","text":"Heterogeneous computing that exploits simultaneous co-processing with different device types has been shown to be effective at both increasing performance and reducing energy consumption. In this paper, we extend a scheduling framework encapsulated in a high-level C++ template and previously developed for heterogeneous chips comprising CPU and GPU cores, to new high-performance platforms for the data center, which include a cache coherent FPGA fabric and many-core CPU resources. Our goal is to evaluate the suitability of our framework with these new FPGA-based platforms, identifying performance benefits and limitations.We target the state-of-the-art HARP processor that includes 14 high-end Xeon classes tightly coupled to a FPGA device located in the same package. We select eight benchmarks from the high-performance computing domain that have been ported and optimized for this heterogeneous platform. The results show that a dynamic and adaptive scheduler that exploits simultaneous processing among the devices can improve performance up to a factor of 8 × compared to the best alternative solutions that only use the CPU cores or the FPGA fabric. Moreover, our proposal achieves up to 15% and 37% of improvement compared to the best heterogeneous solutions found with a dynamic and static schedulers, respectively."}],"status":"public","type":"journal_article","publication":"The Journal of Supercomputing"},{"date_updated":"2022-01-06T06:52:50Z","date_created":"2020-04-06T12:49:40Z","author":[{"first_name":"Zhaoshi","full_name":"Li, Zhaoshi","last_name":"Li"},{"first_name":"Leibo","last_name":"Liu","full_name":"Liu, Leibo"},{"first_name":"Yangdong","last_name":"Deng","full_name":"Deng, Yangdong"},{"first_name":"Jiawei","full_name":"Wang, Jiawei","last_name":"Wang"},{"first_name":"Zhiwei","full_name":"Liu, Zhiwei","last_name":"Liu"},{"first_name":"Shouyi","full_name":"Yin, Shouyi","last_name":"Yin"},{"first_name":"Shaojun","full_name":"Wei, Shaojun","last_name":"Wei"}],"title":"FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory","doi":"10.1145/3352460.3358270","publication_identifier":{"isbn":["9781450369381"]},"publication_status":"published","year":"2019","citation":{"apa":"Li, Z., Liu, L., Deng, Y., Wang, J., Liu, Z., Yin, S., &#38; Wei, S. (2019). FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory. In <i>Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture</i>. <a href=\"https://doi.org/10.1145/3352460.3358270\">https://doi.org/10.1145/3352460.3358270</a>","short":"Z. Li, L. Liu, Y. Deng, J. Wang, Z. Liu, S. Yin, S. Wei, in: Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture, 2019.","bibtex":"@inproceedings{Li_Liu_Deng_Wang_Liu_Yin_Wei_2019, title={FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory}, DOI={<a href=\"https://doi.org/10.1145/3352460.3358270\">10.1145/3352460.3358270</a>}, booktitle={Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture}, author={Li, Zhaoshi and Liu, Leibo and Deng, Yangdong and Wang, Jiawei and Liu, Zhiwei and Yin, Shouyi and Wei, Shaojun}, year={2019} }","mla":"Li, Zhaoshi, et al. “FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory.” <i>Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture</i>, 2019, doi:<a href=\"https://doi.org/10.1145/3352460.3358270\">10.1145/3352460.3358270</a>.","chicago":"Li, Zhaoshi, Leibo Liu, Yangdong Deng, Jiawei Wang, Zhiwei Liu, Shouyi Yin, and Shaojun Wei. “FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory.” In <i>Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture</i>, 2019. <a href=\"https://doi.org/10.1145/3352460.3358270\">https://doi.org/10.1145/3352460.3358270</a>.","ieee":"Z. Li <i>et al.</i>, “FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory,” in <i>Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture</i>, 2019.","ama":"Li Z, Liu L, Deng Y, et al. FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory. In: <i>Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture</i>. ; 2019. doi:<a href=\"https://doi.org/10.1145/3352460.3358270\">10.1145/3352460.3358270</a>"},"_id":"16427","user_id":"61189","keyword":["pc2-harp-ressources"],"language":[{"iso":"eng"}],"publication":"Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture","type":"conference","abstract":[{"text":"Transactional Memory (TM) has been considered as a promising alternative to existing synchronization operations, which are often the largest stumbling block to unleashing parallelism of applications. Efficient implementations of TM, however, are challenging due to the tension between lowering performance overhead and avoiding unnecessary aborts.\r\n\r\nIn this paper, we present Reachability-based Optimistic Concurrency Control for Transactional Memory (ROCoCoTM), a novel scheme which offloads concurrency control (CC) algorithms, the central building blocks of TM systems, to reconfigurable hardware. To reduce the abort rate, an innovative formalization of mainstream CC algorithms is developed to reveal a common restriction that leads to unnecessary aborts. This restriction is resolved by the ROCoCo algorithm with a centralized validation phase, which can be efficiently pipelined in hardware. Thanks to a high-performance offloading engine implemented in reconfigurable hardware, ROCoCo algorithm results in decreased abort rates and reduced performance overhead. The whole system is implemented on Intel's HARP2 platform and evaluated with the STAMP benchmark suite. Experiments show 1.55x and 8.05x geomean speedup over TinySTM and an HTM based on Intel TSX, respectively. Given the fast-growing deployment of commodity CPU-FPGA platforms, ROCoCoTM paves the way for software programmers to exploit heterogeneous computing resources with a high-level transactional abstraction to effectively extract the parallelism in modern applications.","lang":"eng"}],"status":"public"},{"status":"public","publication":"The Journal of Chemical Physics","type":"journal_article","language":[{"iso":"eng"}],"keyword":["pc2-ressources"],"article_number":"044704","user_id":"61189","_id":"16955","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"citation":{"ama":"Heinen M, Vrabec J. Evaporation sampled by stationary molecular dynamics simulation. <i>The Journal of Chemical Physics</i>. 2019. doi:<a href=\"https://doi.org/10.1063/1.5111759\">10.1063/1.5111759</a>","ieee":"M. Heinen and J. Vrabec, “Evaporation sampled by stationary molecular dynamics simulation,” <i>The Journal of Chemical Physics</i>, 2019.","chicago":"Heinen, Matthias, and Jadran Vrabec. “Evaporation Sampled by Stationary Molecular Dynamics Simulation.” <i>The Journal of Chemical Physics</i>, 2019. <a href=\"https://doi.org/10.1063/1.5111759\">https://doi.org/10.1063/1.5111759</a>.","bibtex":"@article{Heinen_Vrabec_2019, title={Evaporation sampled by stationary molecular dynamics simulation}, DOI={<a href=\"https://doi.org/10.1063/1.5111759\">10.1063/1.5111759</a>}, number={044704}, journal={The Journal of Chemical Physics}, author={Heinen, Matthias and Vrabec, Jadran}, year={2019} }","short":"M. Heinen, J. Vrabec, The Journal of Chemical Physics (2019).","mla":"Heinen, Matthias, and Jadran Vrabec. “Evaporation Sampled by Stationary Molecular Dynamics Simulation.” <i>The Journal of Chemical Physics</i>, 044704, 2019, doi:<a href=\"https://doi.org/10.1063/1.5111759\">10.1063/1.5111759</a>.","apa":"Heinen, M., &#38; Vrabec, J. (2019). Evaporation sampled by stationary molecular dynamics simulation. <i>The Journal of Chemical Physics</i>. <a href=\"https://doi.org/10.1063/1.5111759\">https://doi.org/10.1063/1.5111759</a>"},"year":"2019","publication_identifier":{"issn":["0021-9606","1089-7690"]},"publication_status":"published","doi":"10.1063/1.5111759","title":"Evaporation sampled by stationary molecular dynamics simulation","author":[{"first_name":"Matthias","full_name":"Heinen, Matthias","last_name":"Heinen"},{"first_name":"Jadran","full_name":"Vrabec, Jadran","last_name":"Vrabec"}],"date_created":"2020-05-15T07:25:04Z","date_updated":"2022-01-06T06:53:00Z"},{"citation":{"short":"R. Fingerhut, G. Herres, J. Vrabec, Molecular Physics (2019).","mla":"Fingerhut, Robin, et al. “Thermodynamic Factor of Quaternary Mixtures from Kirkwood–Buff Integration.” <i>Molecular Physics</i>, e1643046, 2019, doi:<a href=\"https://doi.org/10.1080/00268976.2019.1643046\">10.1080/00268976.2019.1643046</a>.","bibtex":"@article{Fingerhut_Herres_Vrabec_2019, title={Thermodynamic factor of quaternary mixtures from Kirkwood–Buff integration}, DOI={<a href=\"https://doi.org/10.1080/00268976.2019.1643046\">10.1080/00268976.2019.1643046</a>}, number={e1643046}, journal={Molecular Physics}, author={Fingerhut, Robin and Herres, Gerhard and Vrabec, Jadran}, year={2019} }","apa":"Fingerhut, R., Herres, G., &#38; Vrabec, J. (2019). Thermodynamic factor of quaternary mixtures from Kirkwood–Buff integration. <i>Molecular Physics</i>. <a href=\"https://doi.org/10.1080/00268976.2019.1643046\">https://doi.org/10.1080/00268976.2019.1643046</a>","chicago":"Fingerhut, Robin, Gerhard Herres, and Jadran Vrabec. “Thermodynamic Factor of Quaternary Mixtures from Kirkwood–Buff Integration.” <i>Molecular Physics</i>, 2019. <a href=\"https://doi.org/10.1080/00268976.2019.1643046\">https://doi.org/10.1080/00268976.2019.1643046</a>.","ieee":"R. Fingerhut, G. Herres, and J. Vrabec, “Thermodynamic factor of quaternary mixtures from Kirkwood–Buff integration,” <i>Molecular Physics</i>, 2019.","ama":"Fingerhut R, Herres G, Vrabec J. Thermodynamic factor of quaternary mixtures from Kirkwood–Buff integration. <i>Molecular Physics</i>. 2019. doi:<a href=\"https://doi.org/10.1080/00268976.2019.1643046\">10.1080/00268976.2019.1643046</a>"},"year":"2019","publication_identifier":{"issn":["0026-8976","1362-3028"]},"publication_status":"published","doi":"10.1080/00268976.2019.1643046","title":"Thermodynamic factor of quaternary mixtures from Kirkwood–Buff integration","author":[{"first_name":"Robin","full_name":"Fingerhut, Robin","last_name":"Fingerhut"},{"last_name":"Herres","full_name":"Herres, Gerhard","first_name":"Gerhard"},{"full_name":"Vrabec, Jadran","last_name":"Vrabec","first_name":"Jadran"}],"date_created":"2020-05-15T07:25:34Z","date_updated":"2022-01-06T06:53:00Z","status":"public","publication":"Molecular Physics","type":"journal_article","language":[{"iso":"eng"}],"keyword":["pc2-ressources"],"article_number":"e1643046","user_id":"61189","_id":"16958","project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}]},{"status":"public","publication":"The Journal of Physical Chemistry C","type":"journal_article","keyword":["pc2-ressources"],"language":[{"iso":"eng"}],"_id":"16960","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"user_id":"61189","year":"2019","page":"21367-21375","citation":{"apa":"Mennicken, M., Peter, S. K., Kaulen, C., Simon, U., &#38; Karthäuser, S. (2019). Controlling the Electronic Contact at the Terpyridine/Metal Interface. <i>The Journal of Physical Chemistry C</i>, 21367–21375. <a href=\"https://doi.org/10.1021/acs.jpcc.9b05865\">https://doi.org/10.1021/acs.jpcc.9b05865</a>","mla":"Mennicken, Max, et al. “Controlling the Electronic Contact at the Terpyridine/Metal Interface.” <i>The Journal of Physical Chemistry C</i>, 2019, pp. 21367–75, doi:<a href=\"https://doi.org/10.1021/acs.jpcc.9b05865\">10.1021/acs.jpcc.9b05865</a>.","bibtex":"@article{Mennicken_Peter_Kaulen_Simon_Karthäuser_2019, title={Controlling the Electronic Contact at the Terpyridine/Metal Interface}, DOI={<a href=\"https://doi.org/10.1021/acs.jpcc.9b05865\">10.1021/acs.jpcc.9b05865</a>}, journal={The Journal of Physical Chemistry C}, author={Mennicken, Max and Peter, Sophia Katharina and Kaulen, Corinna and Simon, Ulrich and Karthäuser, Silvia}, year={2019}, pages={21367–21375} }","short":"M. Mennicken, S.K. Peter, C. Kaulen, U. Simon, S. Karthäuser, The Journal of Physical Chemistry C (2019) 21367–21375.","ama":"Mennicken M, Peter SK, Kaulen C, Simon U, Karthäuser S. Controlling the Electronic Contact at the Terpyridine/Metal Interface. <i>The Journal of Physical Chemistry C</i>. 2019:21367-21375. doi:<a href=\"https://doi.org/10.1021/acs.jpcc.9b05865\">10.1021/acs.jpcc.9b05865</a>","ieee":"M. Mennicken, S. K. Peter, C. Kaulen, U. Simon, and S. Karthäuser, “Controlling the Electronic Contact at the Terpyridine/Metal Interface,” <i>The Journal of Physical Chemistry C</i>, pp. 21367–21375, 2019.","chicago":"Mennicken, Max, Sophia Katharina Peter, Corinna Kaulen, Ulrich Simon, and Silvia Karthäuser. “Controlling the Electronic Contact at the Terpyridine/Metal Interface.” <i>The Journal of Physical Chemistry C</i>, 2019, 21367–75. <a href=\"https://doi.org/10.1021/acs.jpcc.9b05865\">https://doi.org/10.1021/acs.jpcc.9b05865</a>."},"publication_identifier":{"issn":["1932-7447","1932-7455"]},"publication_status":"published","title":"Controlling the Electronic Contact at the Terpyridine/Metal Interface","doi":"10.1021/acs.jpcc.9b05865","date_updated":"2022-01-06T06:53:00Z","date_created":"2020-05-15T09:38:49Z","author":[{"first_name":"Max","last_name":"Mennicken","full_name":"Mennicken, Max"},{"full_name":"Peter, Sophia Katharina","last_name":"Peter","first_name":"Sophia Katharina"},{"last_name":"Kaulen","full_name":"Kaulen, Corinna","first_name":"Corinna"},{"first_name":"Ulrich","full_name":"Simon, Ulrich","last_name":"Simon"},{"first_name":"Silvia","last_name":"Karthäuser","full_name":"Karthäuser, Silvia"}]},{"year":"2019","citation":{"short":"C. Wiebeler, I. Schapiro, Molecules (2019).","bibtex":"@article{Wiebeler_Schapiro_2019, title={QM/MM Benchmarking of Cyanobacteriochrome Slr1393g3 Absorption Spectra}, DOI={<a href=\"https://doi.org/10.3390/molecules24091720\">10.3390/molecules24091720</a>}, number={1720}, journal={Molecules}, author={Wiebeler, Christian and Schapiro, Igor}, year={2019} }","mla":"Wiebeler, Christian, and Igor Schapiro. “QM/MM Benchmarking of Cyanobacteriochrome Slr1393g3 Absorption Spectra.” <i>Molecules</i>, 1720, 2019, doi:<a href=\"https://doi.org/10.3390/molecules24091720\">10.3390/molecules24091720</a>.","apa":"Wiebeler, C., &#38; Schapiro, I. (2019). QM/MM Benchmarking of Cyanobacteriochrome Slr1393g3 Absorption Spectra. <i>Molecules</i>. <a href=\"https://doi.org/10.3390/molecules24091720\">https://doi.org/10.3390/molecules24091720</a>","ieee":"C. Wiebeler and I. Schapiro, “QM/MM Benchmarking of Cyanobacteriochrome Slr1393g3 Absorption Spectra,” <i>Molecules</i>, 2019.","chicago":"Wiebeler, Christian, and Igor Schapiro. “QM/MM Benchmarking of Cyanobacteriochrome Slr1393g3 Absorption Spectra.” <i>Molecules</i>, 2019. <a href=\"https://doi.org/10.3390/molecules24091720\">https://doi.org/10.3390/molecules24091720</a>.","ama":"Wiebeler C, Schapiro I. QM/MM Benchmarking of Cyanobacteriochrome Slr1393g3 Absorption Spectra. <i>Molecules</i>. 2019. doi:<a href=\"https://doi.org/10.3390/molecules24091720\">10.3390/molecules24091720</a>"},"publication_identifier":{"issn":["1420-3049"]},"publication_status":"published","title":"QM/MM Benchmarking of Cyanobacteriochrome Slr1393g3 Absorption Spectra","doi":"10.3390/molecules24091720","date_updated":"2022-01-06T06:53:04Z","author":[{"first_name":"Christian","full_name":"Wiebeler, Christian","last_name":"Wiebeler"},{"last_name":"Schapiro","full_name":"Schapiro, Igor","first_name":"Igor"}],"date_created":"2020-06-04T07:27:03Z","abstract":[{"lang":"eng","text":"<jats:p>Cyanobacteriochromes are compact and spectrally diverse photoreceptor proteins that are promising candidates for biotechnological applications. Computational studies can contribute to an understanding at a molecular level of their wide spectral tuning and diversity. In this contribution, we benchmark methods to model a 110 nm shift in the UV/Vis absorption spectrum from a red- to a green-absorbing form of the cyanobacteriochrome Slr1393g3. Based on an assessment of semiempirical methods to describe the chromophore geometries of both forms in vacuo, we find that DFTB2+D leads to structures that are the closest to the reference method. The benchmark of the excited state calculations is based on snapshots from quantum mechanics/molecular mechanics molecular dynamics simulations. In our case, the methods RI-ADC(2) and sTD-DFT based on CAM-B3LYP ground state calculations perform the best, whereas no functional can be recommended to simulate the absorption spectra of both forms with time-dependent density functional theory. Furthermore, the difference in absorption for the lowest energy absorption maxima of both forms can already be modelled with optimized structures, but sampling is required to improve the shape of the absorption bands of both forms, in particular for the second band. This benchmark study can guide further computational studies, as it assesses essential components of a protocol to model the spectral tuning of both cyanobacteriochromes and the related phytochromes.</jats:p>"}],"status":"public","publication":"Molecules","type":"journal_article","keyword":["pc2-ressources"],"article_number":"1720","language":[{"iso":"eng"}],"_id":"17077","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"user_id":"61189"},{"keyword":["pc2-ressources"],"language":[{"iso":"eng"}],"_id":"16959","project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"user_id":"14931","status":"public","publication":"Quantum Information and Measurement (QIM) V: Quantum Technologies","type":"conference","title":"Theoretical description of a multimode SU(1,1) interferometer","doi":"10.1364/qim.2019.t5a.35","date_updated":"2023-02-10T16:01:51Z","author":[{"last_name":"Ferreri","full_name":"Ferreri, A.","first_name":"A."},{"last_name":"Sharapova","full_name":"Sharapova, P.","first_name":"P."},{"id":"36389","full_name":"Luo, Kai Hong","last_name":"Luo","orcid":"0000-0003-1008-4976","first_name":"Kai Hong"},{"full_name":"Herrmann, H.","last_name":"Herrmann","first_name":"H."},{"first_name":"C.","last_name":"Silberhorn","full_name":"Silberhorn, C."}],"date_created":"2020-05-15T09:21:25Z","year":"2019","citation":{"apa":"Ferreri, A., Sharapova, P., Luo, K. H., Herrmann, H., &#38; Silberhorn, C. (2019). Theoretical description of a multimode SU(1,1) interferometer. <i>Quantum Information and Measurement (QIM) V: Quantum Technologies</i>. <a href=\"https://doi.org/10.1364/qim.2019.t5a.35\">https://doi.org/10.1364/qim.2019.t5a.35</a>","short":"A. Ferreri, P. Sharapova, K.H. Luo, H. Herrmann, C. Silberhorn, in: Quantum Information and Measurement (QIM) V: Quantum Technologies, 2019.","bibtex":"@inproceedings{Ferreri_Sharapova_Luo_Herrmann_Silberhorn_2019, title={Theoretical description of a multimode SU(1,1) interferometer}, DOI={<a href=\"https://doi.org/10.1364/qim.2019.t5a.35\">10.1364/qim.2019.t5a.35</a>}, booktitle={Quantum Information and Measurement (QIM) V: Quantum Technologies}, author={Ferreri, A. and Sharapova, P. and Luo, Kai Hong and Herrmann, H. and Silberhorn, C.}, year={2019} }","mla":"Ferreri, A., et al. “Theoretical Description of a Multimode SU(1,1) Interferometer.” <i>Quantum Information and Measurement (QIM) V: Quantum Technologies</i>, 2019, doi:<a href=\"https://doi.org/10.1364/qim.2019.t5a.35\">10.1364/qim.2019.t5a.35</a>.","ama":"Ferreri A, Sharapova P, Luo KH, Herrmann H, Silberhorn C. Theoretical description of a multimode SU(1,1) interferometer. In: <i>Quantum Information and Measurement (QIM) V: Quantum Technologies</i>. ; 2019. doi:<a href=\"https://doi.org/10.1364/qim.2019.t5a.35\">10.1364/qim.2019.t5a.35</a>","chicago":"Ferreri, A., P. Sharapova, Kai Hong Luo, H. Herrmann, and C. Silberhorn. “Theoretical Description of a Multimode SU(1,1) Interferometer.” In <i>Quantum Information and Measurement (QIM) V: Quantum Technologies</i>, 2019. <a href=\"https://doi.org/10.1364/qim.2019.t5a.35\">https://doi.org/10.1364/qim.2019.t5a.35</a>.","ieee":"A. Ferreri, P. Sharapova, K. H. Luo, H. Herrmann, and C. Silberhorn, “Theoretical description of a multimode SU(1,1) interferometer,” 2019, doi: <a href=\"https://doi.org/10.1364/qim.2019.t5a.35\">10.1364/qim.2019.t5a.35</a>."},"publication_identifier":{"isbn":["9781943580569"]},"publication_status":"published"},{"project":[{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"_id":"16945","user_id":"14931","keyword":["pc2-ressources"],"language":[{"iso":"eng"}],"type":"preprint","status":"public","date_updated":"2023-02-10T16:05:00Z","author":[{"first_name":"Matvei","full_name":"Riabinin, Matvei","last_name":"Riabinin"},{"first_name":"Polina","last_name":"Sharapova","id":"60286","full_name":"Sharapova, Polina"},{"id":"49683","full_name":"Bartley, Tim","last_name":"Bartley","first_name":"Tim"},{"first_name":"Torsten","orcid":"0000-0001-8864-2072","last_name":"Meier","full_name":"Meier, Torsten","id":"344"}],"date_created":"2020-05-08T09:13:02Z","title":"Generating two-mode squeezing and Schrödinger cat states with multimode measurement-induced nonlinearity","year":"2019","citation":{"ieee":"M. Riabinin, P. Sharapova, T. Bartley, and T. Meier, “Generating two-mode squeezing and Schrödinger cat states with multimode measurement-induced nonlinearity.” 2019.","chicago":"Riabinin, Matvei, Polina Sharapova, Tim Bartley, and Torsten Meier. “Generating Two-Mode Squeezing and Schrödinger Cat States with Multimode Measurement-Induced Nonlinearity,” 2019.","ama":"Riabinin M, Sharapova P, Bartley T, Meier T. Generating two-mode squeezing and Schrödinger cat states with multimode measurement-induced nonlinearity. Published online 2019.","apa":"Riabinin, M., Sharapova, P., Bartley, T., &#38; Meier, T. (2019). <i>Generating two-mode squeezing and Schrödinger cat states with multimode measurement-induced nonlinearity</i>.","bibtex":"@article{Riabinin_Sharapova_Bartley_Meier_2019, title={Generating two-mode squeezing and Schrödinger cat states with multimode measurement-induced nonlinearity}, author={Riabinin, Matvei and Sharapova, Polina and Bartley, Tim and Meier, Torsten}, year={2019} }","mla":"Riabinin, Matvei, et al. <i>Generating Two-Mode Squeezing and Schrödinger Cat States with Multimode Measurement-Induced Nonlinearity</i>. 2019.","short":"M. Riabinin, P. Sharapova, T. Bartley, T. Meier, (2019)."}},{"publication":"IEEE Computer Architecture Letters","type":"journal_article","abstract":[{"text":"The advent of FPGA-based hybrid architecture offers the opportunity of customizing memory subsystems to enhance the overall system performance. However, it is not straightforward to design efficient FPGA circuits for emerging FPGAs applications such as in-memory database and graph analytics, which heavily depend on concurrent data structures (CDS'). Highly dynamic behaviors of CDS' have to be orchestrated by synchronization primitives for correct execution. These primitives induce overwhelming memory traffic for synchronizations on FPGAs. This paper proposes a novel method for systematically exploring and exploiting memory-level parallelism (MLP) of CDS by transactional execution on FPGAs. Inspired by the idea that semantics of transactions can be implemented in a more efficient and scalable manner on FPGAs than on CPUs, we propose a transaction-based reconfigurable runtime system for capturing MLP of CDS'. Experiments on linked-list and skip-list show our approach achieves 5.18x and 1.55x throughput improvement on average than lock-based FPGA implementations and optimized CDS algorithms on a state-of-the-art multi-core CPU respectively.","lang":"eng"}],"status":"public","_id":"16416","user_id":"61189","keyword":["pc2-harp-ressources"],"language":[{"iso":"eng"}],"publication_identifier":{"issn":["1556-6056","1556-6064","2473-2575"]},"publication_status":"published","year":"2018","page":"147-150","citation":{"ieee":"Z. Li, L. Liu, Y. Deng, S. Yin, and S. Wei, “Breaking the Synchronization Bottleneck with Reconfigurable Transactional Execution,” <i>IEEE Computer Architecture Letters</i>, pp. 147–150, 2018.","chicago":"Li, Zhaoshi, Leibo Liu, Yangdong Deng, Shouyi Yin, and Shaojun Wei. “Breaking the Synchronization Bottleneck with Reconfigurable Transactional Execution.” <i>IEEE Computer Architecture Letters</i>, 2018, 147–50. <a href=\"https://doi.org/10.1109/lca.2018.2828402\">https://doi.org/10.1109/lca.2018.2828402</a>.","ama":"Li Z, Liu L, Deng Y, Yin S, Wei S. Breaking the Synchronization Bottleneck with Reconfigurable Transactional Execution. <i>IEEE Computer Architecture Letters</i>. 2018:147-150. doi:<a href=\"https://doi.org/10.1109/lca.2018.2828402\">10.1109/lca.2018.2828402</a>","apa":"Li, Z., Liu, L., Deng, Y., Yin, S., &#38; Wei, S. (2018). Breaking the Synchronization Bottleneck with Reconfigurable Transactional Execution. <i>IEEE Computer Architecture Letters</i>, 147–150. <a href=\"https://doi.org/10.1109/lca.2018.2828402\">https://doi.org/10.1109/lca.2018.2828402</a>","mla":"Li, Zhaoshi, et al. “Breaking the Synchronization Bottleneck with Reconfigurable Transactional Execution.” <i>IEEE Computer Architecture Letters</i>, 2018, pp. 147–50, doi:<a href=\"https://doi.org/10.1109/lca.2018.2828402\">10.1109/lca.2018.2828402</a>.","bibtex":"@article{Li_Liu_Deng_Yin_Wei_2018, title={Breaking the Synchronization Bottleneck with Reconfigurable Transactional Execution}, DOI={<a href=\"https://doi.org/10.1109/lca.2018.2828402\">10.1109/lca.2018.2828402</a>}, journal={IEEE Computer Architecture Letters}, author={Li, Zhaoshi and Liu, Leibo and Deng, Yangdong and Yin, Shouyi and Wei, Shaojun}, year={2018}, pages={147–150} }","short":"Z. Li, L. Liu, Y. Deng, S. Yin, S. Wei, IEEE Computer Architecture Letters (2018) 147–150."},"date_updated":"2022-01-06T06:52:50Z","author":[{"first_name":"Zhaoshi","last_name":"Li","full_name":"Li, Zhaoshi"},{"first_name":"Leibo","full_name":"Liu, Leibo","last_name":"Liu"},{"first_name":"Yangdong","full_name":"Deng, Yangdong","last_name":"Deng"},{"full_name":"Yin, Shouyi","last_name":"Yin","first_name":"Shouyi"},{"first_name":"Shaojun","last_name":"Wei","full_name":"Wei, Shaojun"}],"date_created":"2020-04-06T11:49:12Z","title":"Breaking the Synchronization Bottleneck with Reconfigurable Transactional Execution","doi":"10.1109/lca.2018.2828402"},{"author":[{"last_name":"Ceissler","full_name":"Ceissler, Ciro","first_name":"Ciro"},{"last_name":"Nepomuceno","full_name":"Nepomuceno, Ramon","first_name":"Ramon"},{"full_name":"Pereira, Marcio","last_name":"Pereira","first_name":"Marcio"},{"full_name":"Araujo, Guido","last_name":"Araujo","first_name":"Guido"}],"date_created":"2020-04-06T12:02:37Z","date_updated":"2022-01-06T06:52:50Z","doi":"10.1109/fccm.2018.00058","title":"Automatic Offloading of Cluster Accelerators","publication_identifier":{"isbn":["9781538655221"]},"publication_status":"published","citation":{"chicago":"Ceissler, Ciro, Ramon Nepomuceno, Marcio Pereira, and Guido Araujo. “Automatic Offloading of Cluster Accelerators.” In <i>2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 2018. <a href=\"https://doi.org/10.1109/fccm.2018.00058\">https://doi.org/10.1109/fccm.2018.00058</a>.","ieee":"C. Ceissler, R. Nepomuceno, M. Pereira, and G. Araujo, “Automatic Offloading of Cluster Accelerators,” in <i>2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 2018.","ama":"Ceissler C, Nepomuceno R, Pereira M, Araujo G. Automatic Offloading of Cluster Accelerators. In: <i>2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>. ; 2018. doi:<a href=\"https://doi.org/10.1109/fccm.2018.00058\">10.1109/fccm.2018.00058</a>","apa":"Ceissler, C., Nepomuceno, R., Pereira, M., &#38; Araujo, G. (2018). Automatic Offloading of Cluster Accelerators. In <i>2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>. <a href=\"https://doi.org/10.1109/fccm.2018.00058\">https://doi.org/10.1109/fccm.2018.00058</a>","mla":"Ceissler, Ciro, et al. “Automatic Offloading of Cluster Accelerators.” <i>2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)</i>, 2018, doi:<a href=\"https://doi.org/10.1109/fccm.2018.00058\">10.1109/fccm.2018.00058</a>.","short":"C. Ceissler, R. Nepomuceno, M. Pereira, G. Araujo, in: 2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), 2018.","bibtex":"@inproceedings{Ceissler_Nepomuceno_Pereira_Araujo_2018, title={Automatic Offloading of Cluster Accelerators}, DOI={<a href=\"https://doi.org/10.1109/fccm.2018.00058\">10.1109/fccm.2018.00058</a>}, booktitle={2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)}, author={Ceissler, Ciro and Nepomuceno, Ramon and Pereira, Marcio and Araujo, Guido}, year={2018} }"},"year":"2018","user_id":"61189","_id":"16421","language":[{"iso":"eng"}],"keyword":["pc2-harp-ressources"],"publication":"2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)","type":"conference","status":"public","abstract":[{"lang":"eng","text":"The sheer amount of computing resources required to run modern cloud workloads has put a lot of pressure on the design of power efficient cluster nodes. To address this problem, Intel (HARP) and Microsoft (Catapult) have proposed CPU-FPGA integrated architectures that can deliver efficient power-performance executions. Unfortunately, the integration of FPGA acceleration modules to software is a challenging endeavor that does not have a seamless programming model. This paper proposes HardCloud (www.hardcloud.org), an extension of the OpenMP 4.X standard that eases the task of offloading FPGA modules to cluster accelerators."}]},{"year":"2018","citation":{"ama":"Solis-Vasquez L, Koch A. A Case Study in Using OpenCL on FPGAs: Creating an Open-Source Accelerator of the AutoDock Molecular Docking Software. In: <i>FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers</i>. ; 2018:1-10.","ieee":"L. Solis-Vasquez and A. Koch, “A Case Study in Using OpenCL on FPGAs: Creating an Open-Source Accelerator of the AutoDock Molecular Docking Software,” in <i>FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers</i>, 2018, pp. 1–10.","chicago":"Solis-Vasquez, Leonardo, and Andreas Koch. “A Case Study in Using OpenCL on FPGAs: Creating an Open-Source Accelerator of the AutoDock Molecular Docking Software.” In <i>FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers</i>, 1–10, 2018.","bibtex":"@inproceedings{Solis-Vasquez_Koch_2018, title={A Case Study in Using OpenCL on FPGAs: Creating an Open-Source Accelerator of the AutoDock Molecular Docking Software}, booktitle={FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers}, author={Solis-Vasquez, Leonardo and Koch, Andreas}, year={2018}, pages={1–10} }","short":"L. Solis-Vasquez, A. Koch, in: FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers, 2018, pp. 1–10.","mla":"Solis-Vasquez, Leonardo, and Andreas Koch. “A Case Study in Using OpenCL on FPGAs: Creating an Open-Source Accelerator of the AutoDock Molecular Docking Software.” <i>FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers</i>, 2018, pp. 1–10.","apa":"Solis-Vasquez, L., &#38; Koch, A. (2018). A Case Study in Using OpenCL on FPGAs: Creating an Open-Source Accelerator of the AutoDock Molecular Docking Software. In <i>FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers</i> (pp. 1–10)."},"page":"1-10","date_updated":"2022-01-06T06:52:50Z","date_created":"2020-04-06T12:23:38Z","author":[{"last_name":"Solis-Vasquez","full_name":"Solis-Vasquez, Leonardo","first_name":"Leonardo"},{"first_name":"Andreas","last_name":"Koch","full_name":"Koch, Andreas"}],"title":"A Case Study in Using OpenCL on FPGAs: Creating an Open-Source Accelerator of the AutoDock Molecular Docking Software","type":"conference","publication":"FSP Workshop 2018; Fifth International Workshop on FPGAs for Software Programmers","abstract":[{"text":"In recent years, OpenCL has been increasingly adopted as it enables software programmers to harness the performance and power efficiency of FPGAs. Despite simplifying the FPGA programming challenge, achieving high performance and energy efficiency with OpenCL is still a difficult task. In order to further contribute to the advance of the OpenCL usage for FPGAs, we utilize a realistic application scenario as our case study: the AutoDock molecular docking software. While OpenCL has proven its effectiveness in accelerating molecular docking on GPUs, for FPGA-based AutoDock accelerators it struggles with difficult design patterns. Besides complex multiple-producers to single-consumer datapaths, these include time-intensive loops with variable runtimes. Therefore, this work presents the design and optimization steps for implementing AutoDock in OpenCL targeting an Arria-10 FPGA, as well as a corresponding execution runtime and energy-efficiency evaluation. Applying these techniques improved the performance of the initial OpenCL implementation for FPGAs by three orders of magnitude, with the final version of the code now yielding speed-ups of up to ~2.7x, and energy-efficiency gains of up to ~1.8x over the original serial AutoDock version executing on a current-generation CPU.","lang":"eng"}],"status":"public","_id":"16425","user_id":"61189","keyword":["pc2-harp-ressources"],"language":[{"iso":"eng"}]},{"publication":"Angewandte Chemie","type":"journal_article","status":"public","_id":"17079","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"user_id":"61189","keyword":["pc2-ressources"],"language":[{"iso":"eng"}],"publication_identifier":{"issn":["0044-8249"]},"publication_status":"published","year":"2018","page":"1952-1957","citation":{"apa":"Wiebeler, C., Rao, A. G., Gärtner, W., &#38; Schapiro, I. (2018). Die effektive Konjugationslänge ist für die spektrale Verschiebung im rot/grün schaltenden Cyanobakteriochrom Slr1393g3 verantwortlich. <i>Angewandte Chemie</i>, 1952–1957. <a href=\"https://doi.org/10.1002/ange.201810266\">https://doi.org/10.1002/ange.201810266</a>","bibtex":"@article{Wiebeler_Rao_Gärtner_Schapiro_2018, title={Die effektive Konjugationslänge ist für die spektrale Verschiebung im rot/grün schaltenden Cyanobakteriochrom Slr1393g3 verantwortlich}, DOI={<a href=\"https://doi.org/10.1002/ange.201810266\">10.1002/ange.201810266</a>}, journal={Angewandte Chemie}, author={Wiebeler, Christian and Rao, Aditya G. and Gärtner, Wolfgang and Schapiro, Igor}, year={2018}, pages={1952–1957} }","short":"C. Wiebeler, A.G. Rao, W. Gärtner, I. Schapiro, Angewandte Chemie (2018) 1952–1957.","mla":"Wiebeler, Christian, et al. “Die Effektive Konjugationslänge Ist Für Die Spektrale Verschiebung Im Rot/Grün Schaltenden Cyanobakteriochrom Slr1393g3 Verantwortlich.” <i>Angewandte Chemie</i>, 2018, pp. 1952–57, doi:<a href=\"https://doi.org/10.1002/ange.201810266\">10.1002/ange.201810266</a>.","chicago":"Wiebeler, Christian, Aditya G. Rao, Wolfgang Gärtner, and Igor Schapiro. “Die Effektive Konjugationslänge Ist Für Die Spektrale Verschiebung Im Rot/Grün Schaltenden Cyanobakteriochrom Slr1393g3 Verantwortlich.” <i>Angewandte Chemie</i>, 2018, 1952–57. <a href=\"https://doi.org/10.1002/ange.201810266\">https://doi.org/10.1002/ange.201810266</a>.","ieee":"C. Wiebeler, A. G. Rao, W. Gärtner, and I. Schapiro, “Die effektive Konjugationslänge ist für die spektrale Verschiebung im rot/grün schaltenden Cyanobakteriochrom Slr1393g3 verantwortlich,” <i>Angewandte Chemie</i>, pp. 1952–1957, 2018.","ama":"Wiebeler C, Rao AG, Gärtner W, Schapiro I. Die effektive Konjugationslänge ist für die spektrale Verschiebung im rot/grün schaltenden Cyanobakteriochrom Slr1393g3 verantwortlich. <i>Angewandte Chemie</i>. 2018:1952-1957. doi:<a href=\"https://doi.org/10.1002/ange.201810266\">10.1002/ange.201810266</a>"},"date_updated":"2022-01-06T06:53:04Z","date_created":"2020-06-04T07:31:59Z","author":[{"last_name":"Wiebeler","full_name":"Wiebeler, Christian","first_name":"Christian"},{"last_name":"Rao","full_name":"Rao, Aditya G.","first_name":"Aditya G."},{"first_name":"Wolfgang","last_name":"Gärtner","full_name":"Gärtner, Wolfgang"},{"first_name":"Igor","last_name":"Schapiro","full_name":"Schapiro, Igor"}],"title":"Die effektive Konjugationslänge ist für die spektrale Verschiebung im rot/grün schaltenden Cyanobakteriochrom Slr1393g3 verantwortlich","doi":"10.1002/ange.201810266"},{"type":"journal_article","publication":"Angewandte Chemie International Edition","status":"public","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"17080","user_id":"61189","keyword":["pc2-ressources"],"language":[{"iso":"eng"}],"publication_status":"published","publication_identifier":{"issn":["1433-7851"]},"year":"2018","citation":{"ieee":"C. Wiebeler, A. G. Rao, W. Gärtner, and I. Schapiro, “The Effective Conjugation Length Is Responsible for the Red/Green Spectral Tuning in the Cyanobacteriochrome Slr1393g3,” <i>Angewandte Chemie International Edition</i>, pp. 1934–1938, 2018.","chicago":"Wiebeler, Christian, Aditya G. Rao, Wolfgang Gärtner, and Igor Schapiro. “The Effective Conjugation Length Is Responsible for the Red/Green Spectral Tuning in the Cyanobacteriochrome Slr1393g3.” <i>Angewandte Chemie International Edition</i>, 2018, 1934–38. <a href=\"https://doi.org/10.1002/anie.201810266\">https://doi.org/10.1002/anie.201810266</a>.","ama":"Wiebeler C, Rao AG, Gärtner W, Schapiro I. The Effective Conjugation Length Is Responsible for the Red/Green Spectral Tuning in the Cyanobacteriochrome Slr1393g3. <i>Angewandte Chemie International Edition</i>. 2018:1934-1938. doi:<a href=\"https://doi.org/10.1002/anie.201810266\">10.1002/anie.201810266</a>","mla":"Wiebeler, Christian, et al. “The Effective Conjugation Length Is Responsible for the Red/Green Spectral Tuning in the Cyanobacteriochrome Slr1393g3.” <i>Angewandte Chemie International Edition</i>, 2018, pp. 1934–38, doi:<a href=\"https://doi.org/10.1002/anie.201810266\">10.1002/anie.201810266</a>.","short":"C. Wiebeler, A.G. Rao, W. Gärtner, I. Schapiro, Angewandte Chemie International Edition (2018) 1934–1938.","bibtex":"@article{Wiebeler_Rao_Gärtner_Schapiro_2018, title={The Effective Conjugation Length Is Responsible for the Red/Green Spectral Tuning in the Cyanobacteriochrome Slr1393g3}, DOI={<a href=\"https://doi.org/10.1002/anie.201810266\">10.1002/anie.201810266</a>}, journal={Angewandte Chemie International Edition}, author={Wiebeler, Christian and Rao, Aditya G. and Gärtner, Wolfgang and Schapiro, Igor}, year={2018}, pages={1934–1938} }","apa":"Wiebeler, C., Rao, A. G., Gärtner, W., &#38; Schapiro, I. (2018). The Effective Conjugation Length Is Responsible for the Red/Green Spectral Tuning in the Cyanobacteriochrome Slr1393g3. <i>Angewandte Chemie International Edition</i>, 1934–1938. <a href=\"https://doi.org/10.1002/anie.201810266\">https://doi.org/10.1002/anie.201810266</a>"},"page":"1934-1938","date_updated":"2022-01-06T06:53:04Z","author":[{"first_name":"Christian","last_name":"Wiebeler","full_name":"Wiebeler, Christian"},{"first_name":"Aditya G.","last_name":"Rao","full_name":"Rao, Aditya G."},{"last_name":"Gärtner","full_name":"Gärtner, Wolfgang","first_name":"Wolfgang"},{"first_name":"Igor","last_name":"Schapiro","full_name":"Schapiro, Igor"}],"date_created":"2020-06-04T07:34:14Z","title":"The Effective Conjugation Length Is Responsible for the Red/Green Spectral Tuning in the Cyanobacteriochrome Slr1393g3","doi":"10.1002/anie.201810266"},{"type":"journal_article","publication":"Journal of the American Chemical Society","status":"public","project":[{"_id":"52","name":"Computing Resources Provided by the Paderborn Center for Parallel Computing"}],"_id":"16954","user_id":"61189","keyword":["pc2-ressources"],"language":[{"iso":"eng"}],"publication_status":"published","publication_identifier":{"issn":["0002-7863","1520-5126"]},"year":"2016","citation":{"ama":"Hanske J, Aleksić S, Ballaschk M, et al. Intradomain Allosteric Network Modulates Calcium Affinity of the C-Type Lectin Receptor Langerin. <i>Journal of the American Chemical Society</i>. 2016:12176-12186. doi:<a href=\"https://doi.org/10.1021/jacs.6b05458\">10.1021/jacs.6b05458</a>","ieee":"J. Hanske <i>et al.</i>, “Intradomain Allosteric Network Modulates Calcium Affinity of the C-Type Lectin Receptor Langerin,” <i>Journal of the American Chemical Society</i>, pp. 12176–12186, 2016.","chicago":"Hanske, Jonas, Stevan Aleksić, Martin Ballaschk, Marcel Jurk, Elena Shanina, Monika Beerbaum, Peter Schmieder, Bettina G. Keller, and Christoph Rademacher. “Intradomain Allosteric Network Modulates Calcium Affinity of the C-Type Lectin Receptor Langerin.” <i>Journal of the American Chemical Society</i>, 2016, 12176–86. <a href=\"https://doi.org/10.1021/jacs.6b05458\">https://doi.org/10.1021/jacs.6b05458</a>.","apa":"Hanske, J., Aleksić, S., Ballaschk, M., Jurk, M., Shanina, E., Beerbaum, M., … Rademacher, C. (2016). Intradomain Allosteric Network Modulates Calcium Affinity of the C-Type Lectin Receptor Langerin. <i>Journal of the American Chemical Society</i>, 12176–12186. <a href=\"https://doi.org/10.1021/jacs.6b05458\">https://doi.org/10.1021/jacs.6b05458</a>","bibtex":"@article{Hanske_Aleksić_Ballaschk_Jurk_Shanina_Beerbaum_Schmieder_Keller_Rademacher_2016, title={Intradomain Allosteric Network Modulates Calcium Affinity of the C-Type Lectin Receptor Langerin}, DOI={<a href=\"https://doi.org/10.1021/jacs.6b05458\">10.1021/jacs.6b05458</a>}, journal={Journal of the American Chemical Society}, author={Hanske, Jonas and Aleksić, Stevan and Ballaschk, Martin and Jurk, Marcel and Shanina, Elena and Beerbaum, Monika and Schmieder, Peter and Keller, Bettina G. and Rademacher, Christoph}, year={2016}, pages={12176–12186} }","short":"J. Hanske, S. Aleksić, M. Ballaschk, M. Jurk, E. Shanina, M. Beerbaum, P. Schmieder, B.G. Keller, C. Rademacher, Journal of the American Chemical Society (2016) 12176–12186.","mla":"Hanske, Jonas, et al. “Intradomain Allosteric Network Modulates Calcium Affinity of the C-Type Lectin Receptor Langerin.” <i>Journal of the American Chemical Society</i>, 2016, pp. 12176–86, doi:<a href=\"https://doi.org/10.1021/jacs.6b05458\">10.1021/jacs.6b05458</a>."},"page":"12176-12186","date_updated":"2022-01-06T06:53:00Z","date_created":"2020-05-15T07:11:55Z","author":[{"first_name":"Jonas","full_name":"Hanske, Jonas","last_name":"Hanske"},{"full_name":"Aleksić, Stevan","last_name":"Aleksić","first_name":"Stevan"},{"last_name":"Ballaschk","full_name":"Ballaschk, Martin","first_name":"Martin"},{"full_name":"Jurk, Marcel","last_name":"Jurk","first_name":"Marcel"},{"full_name":"Shanina, Elena","last_name":"Shanina","first_name":"Elena"},{"first_name":"Monika","full_name":"Beerbaum, Monika","last_name":"Beerbaum"},{"last_name":"Schmieder","full_name":"Schmieder, Peter","first_name":"Peter"},{"first_name":"Bettina G.","last_name":"Keller","full_name":"Keller, Bettina G."},{"full_name":"Rademacher, Christoph","last_name":"Rademacher","first_name":"Christoph"}],"title":"Intradomain Allosteric Network Modulates Calcium Affinity of the C-Type Lectin Receptor Langerin","doi":"10.1021/jacs.6b05458"}]
