@article{32240, abstract = {{

The effect of traces of ethanol in supercritical carbon dioxide on the mixture's thermodynamic properties is studied by molecular simulations and Taylor dispersion measurements.

}}, author = {{Chatwell, René Spencer and Guevara-Carrion, Gabriela and Gaponenko, Yuri and Shevtsova, Valentina and Vrabec, Jadran}}, issn = {{1463-9076}}, journal = {{Physical Chemistry Chemical Physics}}, keywords = {{Physical and Theoretical Chemistry, General Physics and Astronomy}}, number = {{4}}, pages = {{3106--3115}}, publisher = {{Royal Society of Chemistry (RSC)}}, title = {{{Diffusion of the carbon dioxide–ethanol mixture in the extended critical region}}}, doi = {{10.1039/d0cp04985a}}, volume = {{23}}, year = {{2021}}, } @article{32246, abstract = {{

State-of-the-art methods in materials science such as artificial intelligence and data-driven techniques advance the investigation of photovoltaic materials.

}}, author = {{Mirhosseini, Hossein and Kormath Madam Raghupathy, Ramya and Sahoo, Sudhir K. and Wiebeler, Hendrik and Chugh, Manjusha and Kühne, Thomas D.}}, issn = {{1463-9076}}, journal = {{Physical Chemistry Chemical Physics}}, keywords = {{Physical and Theoretical Chemistry, General Physics and Astronomy}}, number = {{46}}, pages = {{26682--26701}}, publisher = {{Royal Society of Chemistry (RSC)}}, title = {{{In silico investigation of Cu(In,Ga)Se2-based solar cells}}}, doi = {{10.1039/d0cp04712k}}, volume = {{22}}, year = {{2020}}, } @article{16277, abstract = {{CP2K is an open source electronic structure and molecular dynamics software package to perform atomistic simulations of solid-state, liquid, molecular, and biological systems. It is especially aimed at massively parallel and linear-scaling electronic structure methods and state-of-theart ab initio molecular dynamics simulations. Excellent performance for electronic structure calculations is achieved using novel algorithms implemented for modern high-performance computing systems. This review revisits the main capabilities of CP2K to perform efficient and accurate electronic structure simulations. The emphasis is put on density functional theory and multiple post–Hartree–Fock methods using the Gaussian and plane wave approach and its augmented all-electron extension.}}, author = {{Kühne, Thomas and Iannuzzi, Marcella and Ben, Mauro Del and Rybkin, Vladimir V. and Seewald, Patrick and Stein, Frederick and Laino, Teodoro and Khaliullin, Rustam Z. and Schütt, Ole and Schiffmann, Florian and Golze, Dorothea and Wilhelm, Jan and Chulkov, Sergey and Mohammad Hossein Bani-Hashemian, Mohammad Hossein Bani-Hashemian and Weber, Valéry and Borstnik, Urban and Taillefumier, Mathieu and Jakobovits, Alice Shoshana and Lazzaro, Alfio and Pabst, Hans and Müller, Tiziano and Schade, Robert and Guidon, Manuel and Andermatt, Samuel and Holmberg, Nico and Schenter, Gregory K. and Hehn, Anna and Bussy, Augustin and Belleflamme, Fabian and Tabacchi, Gloria and Glöß, Andreas and Lass, Michael and Bethune, Iain and Mundy, Christopher J. and Plessl, Christian and Watkins, Matt and VandeVondele, Joost and Krack, Matthias and Hutter, Jürg}}, journal = {{The Journal of Chemical Physics}}, number = {{19}}, title = {{{CP2K: An electronic structure and molecular dynamics software package - Quickstep: Efficient and accurate electronic structure calculations}}}, doi = {{10.1063/5.0007045}}, volume = {{152}}, year = {{2020}}, } @article{12878, abstract = {{In scientific computing, the acceleration of atomistic computer simulations by means of custom hardware is finding ever-growing application. A major limitation, however, is that the high efficiency in terms of performance and low power consumption entails the massive usage of low precision computing units. Here, based on the approximate computing paradigm, we present an algorithmic method to compensate for numerical inaccuracies due to low accuracy arithmetic operations rigorously, yet still obtaining exact expectation values using a properly modified Langevin-type equation.}}, author = {{Rengaraj, Varadarajan and Lass, Michael and Plessl, Christian and Kühne, Thomas}}, journal = {{Computation}}, number = {{2}}, publisher = {{MDPI}}, title = {{{Accurate Sampling with Noisy Forces from Approximate Computing}}}, doi = {{10.3390/computation8020039}}, volume = {{8}}, year = {{2020}}, } @article{7689, author = {{Riebler, Heinrich and Vaz, Gavin Francis and Kenter, Tobias and Plessl, Christian}}, journal = {{ACM Trans. Archit. Code Optim. (TACO)}}, keywords = {{htrop}}, number = {{2}}, pages = {{14:1–14:26}}, publisher = {{ACM}}, title = {{{Transparent Acceleration for Heterogeneous Platforms with Compilation to OpenCL}}}, doi = {{10.1145/3319423}}, volume = {{16}}, year = {{2019}}, } @article{21, abstract = {{We address the general mathematical problem of computing the inverse p-th root of a given matrix in an efficient way. A new method to construct iteration functions that allow calculating arbitrary p-th roots and their inverses of symmetric positive definite matrices is presented. We show that the order of convergence is at least quadratic and that adaptively adjusting a parameter q always leads to an even faster convergence. In this way, a better performance than with previously known iteration schemes is achieved. The efficiency of the iterative functions is demonstrated for various matrices with different densities, condition numbers and spectral radii.}}, author = {{Richters, Dorothee and Lass, Michael and Walther, Andrea and Plessl, Christian and Kühne, Thomas}}, journal = {{Communications in Computational Physics}}, number = {{2}}, pages = {{564--585}}, publisher = {{Global Science Press}}, title = {{{A General Algorithm to Calculate the Inverse Principal p-th Root of Symmetric Positive Definite Matrices}}}, doi = {{10.4208/cicp.OA-2018-0053}}, volume = {{25}}, year = {{2019}}, } @article{12871, author = {{Platzner, Marco and Plessl, Christian}}, issn = {{0170-6012}}, journal = {{Informatik Spektrum}}, title = {{{FPGAs im Rechenzentrum}}}, doi = {{10.1007/s00287-019-01187-w}}, year = {{2019}}, } @article{20, abstract = {{Approximate computing has shown to provide new ways to improve performance and power consumption of error-resilient applications. While many of these applications can be found in image processing, data classification or machine learning, we demonstrate its suitability to a problem from scientific computing. Utilizing the self-correcting behavior of iterative algorithms, we show that approximate computing can be applied to the calculation of inverse matrix p-th roots which are required in many applications in scientific computing. Results show great opportunities to reduce the computational effort and bandwidth required for the execution of the discussed algorithm, especially when targeting special accelerator hardware.}}, author = {{Lass, Michael and Kühne, Thomas and Plessl, Christian}}, issn = {{1943-0671}}, journal = {{Embedded Systems Letters}}, number = {{2}}, pages = {{ 33--36}}, publisher = {{IEEE}}, title = {{{Using Approximate Computing for the Calculation of Inverse Matrix p-th Roots}}}, doi = {{10.1109/LES.2017.2760923}}, volume = {{10}}, year = {{2018}}, } @article{6516, author = {{Mertens, Jan Cedric and Boschmann, Alexander and Schmidt, M. and Plessl, Christian}}, issn = {{1369-7072}}, journal = {{Sports Engineering}}, number = {{4}}, pages = {{441--451}}, publisher = {{Springer Nature}}, title = {{{Sprint diagnostic with GPS and inertial sensor fusion}}}, doi = {{10.1007/s12283-018-0291-0}}, volume = {{21}}, year = {{2018}}, } @article{13348, author = {{Luk, Samuel M. H. and Lewandowski, P. and Kwong, N. H. and Baudin, E. and Lafont, O. and Tignon, J. and Leung, P. T. and Chan, Ch. K. P. and Babilon, M. and Schumacher, Stefan and Binder, R.}}, issn = {{0740-3224}}, journal = {{Journal of the Optical Society of America B}}, number = {{1}}, title = {{{Theory of optically controlled anisotropic polariton transport in semiconductor double microcavities}}}, doi = {{10.1364/josab.35.000146}}, volume = {{35}}, year = {{2018}}, } @article{18, abstract = {{Branch and bound (B&B) algorithms structure the search space as a tree and eliminate infeasible solutions early by pruning subtrees that cannot lead to a valid or optimal solution. Custom hardware designs significantly accelerate the execution of these algorithms. In this article, we demonstrate a high-performance B&B implementation on FPGAs. First, we identify general elements of B&B algorithms and describe their implementation as a finite state machine. Then, we introduce workers that autonomously cooperate using work stealing to allow parallel execution and full utilization of the target FPGA. Finally, we explore advantages of instance-specific designs that target a specific problem instance to improve performance. We evaluate our concepts by applying them to a branch and bound problem, the reconstruction of corrupted AES keys obtained from cold-boot attacks. The evaluation shows that our work stealing approach is scalable with the available resources and provides speedups proportional to the number of workers. Instance-specific designs allow us to achieve an overall speedup of 47 × compared to the fastest implementation of AES key reconstruction so far. Finally, we demonstrate how instance-specific designs can be generated just-in-time such that the provided speedups outweigh the additional time required for design synthesis.}}, author = {{Riebler, Heinrich and Lass, Michael and Mittendorf, Robert and Löcke, Thomas and Plessl, Christian}}, issn = {{1936-7406}}, journal = {{ACM Transactions on Reconfigurable Technology and Systems (TRETS)}}, keywords = {{coldboot}}, number = {{3}}, pages = {{24:1--24:23}}, publisher = {{Association for Computing Machinery (ACM)}}, title = {{{Efficient Branch and Bound on FPGAs Using Work Stealing and Instance-Specific Designs}}}, doi = {{10.1145/3053687}}, volume = {{10}}, year = {{2017}}, } @article{1589, author = {{Schumacher, Jörn and Plessl, Christian and Vandelli, Wainer}}, journal = {{Journal of Physics: Conference Series}}, publisher = {{IOP Publishing}}, title = {{{High-Throughput and Low-Latency Network Communication with NetIO}}}, doi = {{10.1088/1742-6596/898/8/082003}}, volume = {{898}}, year = {{2017}}, } @article{165, abstract = {{A broad spectrum of applications can be accelerated by offloading computation intensive parts to reconfigurable hardware. However, to achieve speedups, the number of loop it- erations (trip count) needs to be sufficiently large to amortize offloading overheads. Trip counts are frequently not known at compile time, but only at runtime just before entering a loop. Therefore, we propose to generate code for both the CPU and the coprocessor, and defer the offloading decision to the application runtime. We demonstrate how a toolflow, based on the LLVM compiler framework, can automatically embed dynamic offloading de- cisions into the application code. We perform in-depth static and dynamic analysis of pop- ular benchmarks, which confirm the general potential of such an approach. We also pro- pose to optimize the offloading process by decoupling the runtime decision from the loop execution (decision slack). The feasibility of our approach is demonstrated by a toolflow that automatically identifies suitable data-parallel loops and generates code for the FPGA coprocessor of a Convey HC-1. We evaluate the integrated toolflow with representative loops executed for different input data sizes.}}, author = {{Vaz, Gavin Francis and Riebler, Heinrich and Kenter, Tobias and Plessl, Christian}}, issn = {{0045-7906}}, journal = {{Computers and Electrical Engineering}}, pages = {{91--111}}, publisher = {{Elsevier}}, title = {{{Potential and Methods for Embedding Dynamic Offloading Decisions into Application Code}}}, doi = {{10.1016/j.compeleceng.2016.04.021}}, volume = {{55}}, year = {{2016}}, } @article{1769, abstract = {{Große zylindrische Stahlprüflinge werden mittels der Methode der finiten Differenzen im Zeitbereich (engl. finite differences in time domain, FDTD) simulativ untersucht. Dabei werden Pitch-Catch-Messanordnungen verwendet. Es werden zwei Bildgebungsansätze vorgestellt: ersterer basiert auf dem Imaging Principle nach Claerbout, letzterer basiert auf gradientenbasierter Optimierung eines Zielfunktionals.}}, author = {{Hegler, Sebastian and Statz, Christoph and Mütze, Marco and Mooshofer, Hubert and Goldammer, Matthias and Fendt, Karl and Schwarzer, Stefan and Feldhoff, Kim and Flehmig, Martin and Markwardt, Ulf and E. Nagel, Wolfgang and Schütte, Maria and Walther, Andrea and Meinel, Michael and Basermann, Achim and Plettemeier, Dirk}}, journal = {{tm - Technisches Messen}}, number = {{9}}, pages = {{440--450}}, publisher = {{Walter de Gruyter}}, title = {{{Simulative Ultraschall-Untersuchung von Pitch-Catch-Messanordnungen für große zylindrische Stahl-Prüflinge und gradientenbasierte Bildgebung}}}, doi = {{doi:10.1515/teme-2015-0031}}, volume = {{82}}, year = {{2015}}, } @article{1772, author = {{Torresen, Jim and Plessl, Christian and Yao, Xin}}, journal = {{IEEE Computer}}, keywords = {{self-awareness, self-expression}}, number = {{7}}, pages = {{18--20}}, publisher = {{IEEE Computer Society}}, title = {{{Self-Aware and Self-Expressive Systems – Guest Editor's Introduction}}}, doi = {{10.1109/MC.2015.205}}, volume = {{48}}, year = {{2015}}, } @article{1774, abstract = {{In this article an efficient numerical method to solve multiobjective optimization problems for fluid flow governed by the Navier Stokes equations is presented. In order to decrease the computational effort, a reduced order model is introduced using Proper Orthogonal Decomposition and a corresponding Galerkin Projection. A global, derivative free multiobjective optimization algorithm is applied to compute the Pareto set (i.e. the set of optimal compromises) for the concurrent objectives minimization of flow field fluctuations and control cost. The method is illustrated for a 2D flow around a cylinder at Re = 100.}}, author = {{Peitz, Sebastian and Dellnitz, Michael}}, issn = {{1617-7061}}, journal = {{PAMM}}, number = {{1}}, pages = {{613--614}}, publisher = {{WILEY-VCH Verlag}}, title = {{{Multiobjective Optimization of the Flow Around a Cylinder Using Model Order Reduction}}}, doi = {{10.1002/pamm.201510296}}, volume = {{15}}, year = {{2015}}, } @article{296, abstract = {{FPGAs are known to permit huge gains in performance and efficiency for suitable applications but still require reduced design efforts and shorter development cycles for wider adoption. In this work, we compare the resulting performance of two design concepts that in different ways promise such increased productivity. As common starting point, we employ a kernel-centric design approach, where computational hotspots in an application are identified and individually accelerated on FPGA. By means of a complex stereo matching application, we evaluate two fundamentally different design philosophies and approaches for implementing the required kernels on FPGAs. In the first implementation approach, we designed individually specialized data flow kernels in a spatial programming language for a Maxeler FPGA platform; in the alternative design approach, we target a vector coprocessor with large vector lengths, which is implemented as a form of programmable overlay on the application FPGAs of a Convey HC-1. We assess both approaches in terms of overall system performance, raw kernel performance, and performance relative to invested resources. After compensating for the effects of the underlying hardware platforms, the specialized dataflow kernels on the Maxeler platform are around 3x faster than kernels executing on the Convey vector coprocessor. In our concrete scenario, due to trade-offs between reconfiguration overheads and exposed parallelism, the advantage of specialized dataflow kernels is reduced to around 2.5x.}}, author = {{Kenter, Tobias and Schmitz, Henning and Plessl, Christian}}, journal = {{International Journal of Reconfigurable Computing (IJRC)}}, publisher = {{Hindawi}}, title = {{{Exploring Tradeoffs between Specialized Kernels and a Reusable Overlay in a Stereo-Matching Case Study}}}, doi = {{10.1155/2015/859425}}, volume = {{2015}}, year = {{2015}}, } @article{1768, author = {{Plessl, Christian and Platzner, Marco and Schreier, Peter J.}}, journal = {{Informatik Spektrum}}, keywords = {{approximate computing, survey}}, number = {{5}}, pages = {{396--399}}, publisher = {{Springer}}, title = {{{Aktuelles Schlagwort: Approximate Computing}}}, doi = {{10.1007/s00287-015-0911-z}}, year = {{2015}}, } @article{1775, abstract = {{The ATLAS experiment at CERN is planning full deployment of a new unified optical link technology for connecting detector front end electronics on the timescale of the LHC Run 4 (2025). It is estimated that roughly 8000 GBT (GigaBit Transceiver) links, with transfer rates up to 10.24 Gbps, will replace existing links used for readout, detector control and distribution of timing and trigger information. A new class of devices will be needed to interface many GBT links to the rest of the trigger, data-acquisition and detector control systems. In this paper FELIX (Front End LInk eXchange) is presented, a PC-based device to route data from and to multiple GBT links via a high-performance general purpose network capable of a total throughput up to O(20 Tbps). FELIX implies architectural changes to the ATLAS data acquisition system, such as the use of industry standard COTS components early in the DAQ chain. Additionally the design and implementation of a FELIX demonstration platform is presented and hardware and software aspects will be discussed.}}, author = {{Anderson, J and Borga, A and Boterenbrood, H and Chen, H and Chen, K and Drake, G and Francis, D and Gorini, B and Lanni, F and Lehmann Miotto, G and Levinson, L and Narevicius, J and Plessl, Christian and Roich, A and Ryu, S and Schreuder, F and Schumacher, Jörn and Vandelli, Wainer and Vermeulen, J and Zhang, J}}, journal = {{Journal of Physics: Conference Series}}, publisher = {{IOP Publishing}}, title = {{{FELIX: a High-Throughput Network Approach for Interfacing to Front End Electronics for ATLAS Upgrades}}}, doi = {{10.1088/1742-6596/664/8/082050}}, volume = {{664}}, year = {{2015}}, } @article{363, abstract = {{Due to the continuously shrinking device structures and increasing densities of FPGAs, thermal aspects have become the new focus for many research projects over the last years. Most researchers rely on temperature simulations to evaluate their novel thermal management techniques. However, these temperature simulations require a high computational effort if a detailed thermal model is used and their accuracies are often unclear. In contrast to simulations, the use of synthetic heat sources allows for experimental evaluation of temperature management methods. In this paper we investigate the creation of significant rises in temperature on modern FPGAs to enable future evaluation of thermal management techniques based on experiments. To that end, we have developed seven different heat-generating cores that use different subsets of FPGA resources. Our experimental results show that, according to external temperature probes connected to the FPGA’s heat sink, we can increase the temperature by an average of 81 !C. This corresponds to an average increase of 156.3 !C as measured by the built-in thermal diodes of our Virtex-5 FPGAs in less than 30 min by only utilizing about 21 percent of the slices.}}, author = {{Agne, Andreas and Hangmann, Hendrik and Happe, Markus and Platzner, Marco and Plessl, Christian}}, journal = {{Microprocessors and Microsystems}}, number = {{8, Part B}}, pages = {{911--919}}, publisher = {{Elsevier}}, title = {{{Seven Recipes for Setting Your FPGA on Fire – A Cookbook on Heat Generators}}}, doi = {{10.1016/j.micpro.2013.12.001}}, volume = {{38}}, year = {{2014}}, } @article{365, abstract = {{Self-aware computing is a paradigm for structuring and simplifying the design and operation of computing systems that face unprecedented levels of system dynamics and thus require novel forms of adaptivity. The generality of the paradigm makes it applicable to many types of computing systems and, previously, researchers started to introduce concepts of self-awareness to multicore architectures. In our work we build on a recent reference architectural framework as a model for self-aware computing and instantiate it for an FPGA-based heterogeneous multicore running the ReconOS reconfigurable architecture and operating system. After presenting the model for self-aware computing and ReconOS, we demonstrate with a case study how a multicore application built on the principle of self-awareness, autonomously adapts to changes in the workload and system state. Our work shows that the reference architectural framework as a model for self-aware computing can be practically applied and allows us to structure and simplify the design process, which is essential for designing complex future computing systems.}}, author = {{Agne, Andreas and Happe, Markus and Lösch, Achim and Plessl, Christian and Platzner, Marco}}, journal = {{ACM Transactions on Reconfigurable Technology and Systems (TRETS)}}, number = {{2}}, publisher = {{ACM}}, title = {{{Self-awareness as a Model for Designing and Operating Heterogeneous Multicores}}}, doi = {{10.1145/2617596}}, volume = {{7}}, year = {{2014}}, } @article{328, abstract = {{The ReconOS operating system for reconfigurable computing offers a unified multi-threaded programming model and operating system services for threads executing in software and threads mapped to reconfigurable hardware. The operating system interface allows hardware threads to interact with software threads using well-known mechanisms such as semaphores, mutexes, condition variables, and message queues. By semantically integrating hardware accelerators into a standard operating system environment, ReconOS allows for rapid design space exploration, supports a structured application development process and improves the portability of applications}}, author = {{Agne, Andreas and Happe, Markus and Keller, Ariane and Lübbers, Enno and Plattner, Bernhard and Platzner, Marco and Plessl, Christian}}, journal = {{IEEE Micro}}, number = {{1}}, pages = {{60--71}}, publisher = {{IEEE}}, title = {{{ReconOS - An Operating System Approach for Reconfigurable Computing}}}, doi = {{10.1109/MM.2013.110}}, volume = {{34}}, year = {{2014}}, } @article{1779, author = {{Giefers, Heiner and Plessl, Christian and Förstner, Jens}}, issn = {{0163-5964}}, journal = {{ACM SIGARCH Computer Architecture News}}, keywords = {{funding-maxup, tet_topic_hpc}}, number = {{5}}, pages = {{65--70}}, publisher = {{ACM}}, title = {{{Accelerating Finite Difference Time Domain Simulations with Reconfigurable Dataflow Computers}}}, doi = {{10.1145/2641361.2641372}}, volume = {{41}}, year = {{2014}}, } @article{1792, author = {{Kasap, Server and Redif, Soydan}}, journal = {{IEEE Trans. on Very Large Scale Integration (VLSI) Systems}}, number = {{3}}, pages = {{522--536}}, publisher = {{IEEE}}, title = {{{Novel Field-Programmable Gate Array Architecture for Computing the Eigenvalue Decomposition of Para-Hermitian Polynomial Matrices}}}, doi = {{10.1109/TVLSI.2013.2248069}}, volume = {{22}}, year = {{2013}}, } @article{1965, abstract = {{Virtualization technology makes data centers more dynamic and easier to administrate. Today, cloud providers offer customers access to complex applications running on virtualized hardware. Nevertheless, big virtualized data centers become stochastic environments and the simplification on the user side leads to many challenges for the provider. He has to find cost-efficient configurations and has to deal with dynamic environments to ensure service level objectives (SLOs). We introduce a software solution that reduces the degree of human intervention to manage clouds. It is designed as a multi-agent system (MAS) and placed on top of the Infrastructure as a Service (IaaS) layer. Worker agents allocate resources, configure applications, check the feasibility of requests, and generate cost estimates. They are equipped with application specific knowledge allowing it to estimate the type and number of necessary resources. During runtime, a worker agent monitors the job and adapts its resources to ensure the specified quality of service—even in noisy clouds where the job instances are influenced by other jobs. They interact with a scheduler agent, which takes care of limited resources and does a cost-aware scheduling by assigning jobs to times with low costs. The whole architecture is self-optimizing and able to use public or private clouds. Building a private cloud needs to face the challenge to find a mapping of virtual machines (VMs) to hosts. We present a rule-based mapping algorithm for VMs. It offers an interface where policies can be defined and combined in a generic way. The algorithm performs the initial mapping at request time as well as a remapping during runtime. It deals with policy and infrastructure changes. An energy-aware scheduler and the availability of cheap resources provided by a spot market are analyzed. We evaluated our approach by building up an SaaS stack, which assigns resources in consideration of an energy function and that ensures SLOs of two different applications, a brokerage system and a high-performance computing software. Experiments were done on a real cloud system and by simulations.}}, author = {{Niehörster, Oliver and Simon, Jens and Brinkmann, André and Keller, Axel and Krüger, Jens}}, journal = {{Journal of Grid Computing}}, number = {{3}}, pages = {{553--577}}, title = {{{Cost-aware and SLO Fulfilling Software as a Service}}}, doi = {{10.1007/s10723-012-9230-7}}, volume = {{10}}, year = {{2012}}, } @article{2102, author = {{Gesing, Sandra and Grunzke, Richard and Krüger, Jens and Birkenheuer, Georg and Wewior, Martin and Schäfer, Patrick and Schuller, Bernd and Schuster, Johannes and Herres-Pawlis, Sonja and Breuers, Sebastian and Balaskó, Ákos and Kozlovszky, Miklos and Szikszay Fabri, Anna and Packschies, Lars and Kacsuk, Peter and Blunk, Dirk and Steinke, Thomas and Brinkmann, André and Fels, Gregor and Müller-Pfefferkorn, Ralph and Jäkel, René and Kohlbacher, Oliver}}, journal = {{Journal of Grid Computing}}, number = {{4}}, pages = {{769--790}}, publisher = {{Springer}}, title = {{{A Single Sign-On Infrastructure for Science Gateways on a Use Case for Structural Bioinformatics}}}, doi = {{10.1007/s10723-012-9247-y}}, volume = {{10}}, year = {{2012}}, } @article{2172, author = {{Thielemans, Kris and Tsoumpas, Charalampos and Mustafovic, Sanida and Beisel, Tobias and Aguiar, Pablo and Dikaios, Nikolaos and W Jacobson, Matthew}}, journal = {{Physics in Medicine and Biology}}, number = {{4}}, pages = {{867--883}}, publisher = {{IOP Publishing}}, title = {{{STIR: Software for Tomographic Image Reconstruction Release 2}}}, doi = {{10.1088/0031-9155/57/4/867}}, volume = {{57}}, year = {{2012}}, } @article{2173, author = {{Redif, Soydan and Kasap, Server}}, journal = {{Int. Journal of Electronics}}, number = {{12}}, pages = {{1646--1651}}, publisher = {{Taylor & Francis}}, title = {{{Parallel algorithm for computation of second-order sequential best rotations}}}, doi = {{10.1080/00207217.2012.751343}}, volume = {{100}}, year = {{2012}}, } @article{2174, author = {{Kasap, Server and Benkrid, Khaled}}, journal = {{Journal of Computers}}, number = {{6}}, pages = {{1312--1328}}, publisher = {{Academy Publishers}}, title = {{{Parallel Processor Design and Implementation for Molecular Dynamics Simulations on a FPGA Parallel Computer}}}, volume = {{7}}, year = {{2012}}, } @article{2176, author = {{Herres-Pawlis, Sonja and Birkenheuer, Georg and Brinkmann, André and Gesing, Sandra and Grunzke, Richard and Jäkel, René and Kohlbacher, Oliver and Krüger, Jens and Dos Santos Vieira, Ines}}, journal = {{Studies in Health Technology and Informatics}}, pages = {{142--151}}, publisher = {{IOP Publishing}}, title = {{{Workflow-enhanced conformational analysis of guanidine zinc complexes via a science gateway}}}, doi = {{10.3233/978-1-61499-054-3-142}}, volume = {{175}}, year = {{2012}}, } @article{2108, author = {{Schumacher, Tobias and Plessl, Christian and Platzner, Marco}}, issn = {{0141-9331}}, journal = {{Microprocessors and Microsystems}}, keywords = {{funding-altera}}, number = {{2}}, pages = {{110--126}}, title = {{{IMORC: An Infrastructure and Architecture Template for Implementing High-Performance Reconfigurable FPGA Accelerators}}}, doi = {{10.1016/j.micpro.2011.04.002}}, volume = {{36}}, year = {{2012}}, } @article{2177, author = {{Grad, Mariusz and Plessl, Christian}}, journal = {{Int. Journal of Reconfigurable Computing (IJRC)}}, publisher = {{Hindawi Publishing Corp.}}, title = {{{On the Feasibility and Limitations of Just-In-Time Instruction Set Extension for FPGA-based Reconfigurable Processors}}}, doi = {{10.1155/2012/418315}}, year = {{2012}}, } @article{1971, abstract = {{System virtualization has become the enabling technology to manage the increasing number of different applications inside data centers. The abstraction from the underlying hardware and the provision of multiple virtual machines (VM) on a single physical server have led to a consolidation and more efficient usage of physical servers. The abstraction from the hardware also eases the provision of applications on different data centers, as applied in several cloud computing environments. In this case, the application need not adapt to the environment of the cloud computing provider, but can travel around with its own VM image, including its own operating system and libraries. System virtualization and cloud computing could also be very attractive in the context of high‐performance computing (HPC). Today, HPC centers have to cope with both, the management of the infrastructure and also the applications. Virtualization technology would enable these centers to focus on the infrastructure, while the users, collaborating inside their virtual organizations (VOs), would be able to provide the software. Nevertheless, there seems to be a contradiction between HPC and cloud computing, as there are very few successful approaches to virtualize HPC centers. This work discusses the underlying reasons, including the management and performance, and presents solutions to overcome the contradiction, including a set of new libraries. The viability of the presented approach is shown based on evaluating a selected parallel, scientific application in a virtualized HPC environment. }}, author = {{Birkenheuer, Georg and Brinkmann, André and Kaiser, Jürgen and Keller, Axel and Keller, Matthias and Kleineweber, Christoph and Konersmann, Christoph and Niehörster, Oliver and Schäfer, Thorsten and Simon, Jens and Wilhelm, Maximilan}}, journal = {{Software: Practice and Experience}}, publisher = {{John Wiley & Sons}}, title = {{{Virtualized HPC: a contradiction in terms?}}}, doi = {{10.1002/spe.1055}}, year = {{2011}}, } @article{2192, author = {{Birkenheuer, Georg and Brinkmann, André and Högqvist, Mikael and Papaspyrou, Alexander and Schott, Bernhard and Sommerfeld, Dietmar and Ziegler, Wolfgang}}, journal = {{Journal of Grid Computing}}, number = {{3}}, pages = {{355--377}}, publisher = {{Springer}}, title = {{{Infrastructure Federation Through Virtualized Delegation of Resources and Services}}}, doi = {{10.1007/s10723-011-9192-1}}, volume = {{9}}, year = {{2011}}, } @article{2201, author = {{Schumacher, Tobias and Süß, Tim and Plessl, Christian and Platzner, Marco}}, journal = {{Int. Journal of Recon- figurable Computing (IJRC)}}, keywords = {{funding-altera}}, publisher = {{Hindawi Publishing Corp.}}, title = {{{FPGA Acceleration of Communication-bound Streaming Applications: Architecture Modeling and a 3D Image Compositing Case Study}}}, doi = {{10.1155/2011/760954}}, year = {{2011}}, } @article{2235, author = {{Brinkmann, André and Battré, Dominic and Birkenheuer, Georg and Kao, Odej and Voß, Kerstin}}, journal = {{ForschungsForum Paderborn}}, number = {{13}}, publisher = {{Universität Paderborn}}, title = {{{Risikomanagement für verteilte Umgebungen}}}, volume = {{13}}, year = {{2010}}, } @article{2354, author = {{Brinkmann, André and Eschweiler, Dominic}}, journal = {{Journal of Supercomputing}}, pages = {{35:1--35:10}}, publisher = {{ACM}}, title = {{{A Microdriver Architecture for Error Correcting Codes inside the Linux Kernel}}}, doi = {{10.1145/1654059.1654095}}, year = {{2009}}, } @article{2405, author = {{Groppe, Sven and Böttcher, Stefan and Birkenheuer, Georg and Höing, André}}, journal = {{Data & Knowledge Engineering}}, number = {{1}}, pages = {{64--110}}, publisher = {{Elsevier}}, title = {{{Reformulating XPath queries and XSLT queries on XSLT views}}}, doi = {{10.1016/j.datak.2005.04.002}}, volume = {{57}}, year = {{2006}}, } @article{1999, abstract = {{Workstation clusters are often not only used for high-throughput computing in time-sharing mode but also for running complex parallel jobs in space-sharing mode. This poses several difficulties to the resource management system, which must be able to reserve computing resources for exclusive use and also to determine an optimal process mapping for a given system topology. On the basis of our CCS software, we describe the anatomy of a modern resource management system. Like Codine, Condor, and LSF, CCS provides mechanisms for the user-friendly system access and management of clusters. But unlike them, CCS is targeted at the effective support of space-sharing parallel computers and even metacomputers. Among other features, CCS provides a versatile resource description facility, topology-based process mapping, pluggable schedulers, and hooks to metacomputer management.}}, author = {{Keller, Axel and Reinefeld, Alexander}}, journal = {{Annual Review of Scalable Computing}}, pages = {{1--31}}, title = {{{Anatomy of a Resource Management System for HPC Clusters}}}, doi = {{10.1142/9789812810182_0001}}, volume = {{3}}, year = {{2001}}, } @article{2007, abstract = {{We present a software system for the management of geographically distributed high‐performance computers. It consists of three components: 1. The Computing Center Software (CCS) is a vendor‐independent resource management software for local HPC systems. It controls the mapping and scheduling of interactive and batch jobs on massively parallel systems; 2. The Resource and Service Description (RSD) is used by CCS for specifying and mapping hardware and software components of (meta‐)computing environments. It has a graphical user interface, a textual representation and an object‐oriented API; 3. The Service Coordination Layer (SCL) co‐ordinates the co‐operative use of resources in autonomous computing sites. It negotiates between the applications' requirements and the available system services. }}, author = {{Brune, Matthias and Gehring, Jörn and Keller, Axel and Reinefeld, Alexander}}, journal = {{Concurrency, Practice, and Experience}}, pages = {{887--911}}, title = {{{Managing Clusters of Geographically Distributed High-Performance Computers}}}, doi = {{10.1002/(SICI)1096-9128(19991225)11:15<887::AID-CPE459>3.0.CO;2-J}}, volume = {{II(15)}}, year = {{1999}}, } @article{2012, abstract = {{With a steadily increasing number of services, metacomputing is now gaining importance in science and industry. Virtual organizations, autonomous agents, mobile computing services, and high-performance client–server applications are among the many examples of metacomputing services. For all of them, resource description plays a major role in organizing access, use, and administration of the computing components and software services. We present a generic Resource and Service Description (RSD) for specifying the hardware and software components of (meta-) computing environments. Its graphical interface allows metacomputer users to specify their resource requests. Its textual counterpart gives service providers the necessary flexibility to specify topology and properties of the available system and software resources. Finally, its internal object-oriented representation is used to link different resource management systems and service tools. With these three representations, our generic RSD approach is a key component for building metacomputer environments.}}, author = {{Brune, Matthias and Gehring, Jörn and Keller, Axel and Monien, Burkhard}}, journal = {{Parallel Computing}}, pages = {{1751--1776}}, publisher = {{Elsevier}}, title = {{{Specifying Resources and Services in Metacomputing Environments}}}, doi = {{10.1016/S0167-8191(98)00076-3}}, volume = {{24}}, year = {{1998}}, } @article{2437, author = {{Simon, Jens and Wierum, Jens-Michael}}, issn = {{0020-0190}}, journal = {{Information Processing Letters - Special Issue on Models of Computation}}, number = {{5}}, pages = {{255--261}}, publisher = {{Elsevier}}, title = {{{The Latency-of-Data-Access model for Analyzing Parallel Computation}}}, doi = {{10.1016/S0020-0190(98)00062-3}}, volume = {{66}}, year = {{1998}}, }