@misc{3365, author = {{Schnuer, Jan-Philip}}, publisher = {{Universität Paderborn}}, title = {{{Static Scheduling Algorithms for Heterogeneous Compute Nodes}}}, year = {{2018}}, } @misc{3366, author = {{Croce, Marcel}}, publisher = {{Universität Paderborn}}, title = {{{Evaluation of OpenCL-based Compilation for FPGAs}}}, year = {{2018}}, } @inproceedings{3373, abstract = {{Modern Boolean satisfiability solvers can emit proofs of unsatisfiability. There is substantial interest in being able to verify such proofs and also in using them for further computations. In this paper, we present an FPGA accelerator for checking resolution proofs, a popular proof format. Our accelerator exploits parallelism at the low level by implementing the basic resolution step in hardware, and at the high level by instantiating a number of parallel modules for proof checking. Since proof checking involves highly irregular memory accesses, we employ Hybrid Memory Cube technology for accelerator memory. The results show that while the accelerator is scalable and achieves speedups for all benchmark proofs, performance improvements are currently limited by the overhead of transitioning the proof into the accelerator memory.}}, author = {{Hansmeier, Tim and Platzner, Marco and Andrews, David}}, booktitle = {{ARC 2018: Applied Reconfigurable Computing. Architectures, Tools, and Applications}}, isbn = {{9783319788890}}, issn = {{0302-9743}}, location = {{Santorini, Greece}}, pages = {{153--165}}, publisher = {{Springer International Publishing}}, title = {{{An FPGA/HMC-Based Accelerator for Resolution Proof Checking}}}, doi = {{10.1007/978-3-319-78890-6_13}}, volume = {{10824}}, year = {{2018}}, } @unpublished{3586, abstract = {{Existing approaches and tools for the generation of approximate circuits often lack generality and are restricted to certain circuit types, approximation techniques, and quality assurance methods. Moreover, only few tools are publicly available. This hinders the development and evaluation of new techniques for approximating circuits and their comparison to previous approaches. In this paper, we first analyze and classify related approaches and then present CIRCA, our flexible framework for search-based approximate circuit generation. CIRCA is developed with a focus on modularity and extensibility. We present the architecture of CIRCA with its clear separation into stages and functional blocks, report on the current prototype, and show initial experiments.}}, author = {{Witschen, Linus Matthias and Wiersema, Tobias and Ghasemzadeh Mohammadi, Hassan and Awais, Muhammad and Platzner, Marco}}, booktitle = {{Third Workshop on Approximate Computing (AxC 2018)}}, keywords = {{Approximate Computing, Framework, Pareto Front, Accuracy}}, pages = {{6}}, title = {{{CIRCA: Towards a Modular and Extensible Framework for Approximate Circuit Generation}}}, year = {{2018}}, } @phdthesis{3720, abstract = {{Traditional cache design uses a consolidated block of memory address bits to index a cache set, equivalent to the use of modulo functions. While this module-based mapping scheme is widely used in contemporary cache structures due to the simplicity of its hardware design and its good performance for sequences of consecutive addresses, its use may not be satisfactory for a variety of application domains having different characteristics.This thesis presents a new type of cache mapping scheme, motivated by programmable capabilities combined with Nature-inspired optimization of reconfigurable hardware. This research has focussed on an FPGA-based evolvable cache structure of the first level cache in a multi-core processor architecture, able to dynamically change cache indexing. To solve the challenge of reconfigurable cache mappings, a programmable Boolean circuit based on a combination of Look-up Table (LUT) memory elements is proposed. Focusing on optimization aspects at the system level, a Performance Measurement Infrastructure is introduced that is able to monitor the underlying microarchitectural metrics, and an adaptive evaluation strategy is presented that leverages on Evolutionary Algorithms, that is not only capable of evolving application-specific address-to-cache-index mappings for level one split caches but also of reducing optimization times. Putting this all together and prototyping in an FPGA for a LEON3/Linux-based multi-core processor, the creation of a system architecture reduces cache misses and improves performance over the use of conventional caches.}}, author = {{Ho, Nam}}, pages = {{139}}, publisher = {{Universität Paderborn}}, title = {{{FPGA-based Reconfigurable Cache Mapping Schemes: Design and Optimization}}}, doi = {{10.17619/UNIPB/1-376}}, year = {{2018}}, } @unpublished{1165, author = {{Witschen, Linus Matthias and Wiersema, Tobias and Platzner, Marco}}, booktitle = {{4th Workshop On Approximate Computing (WAPCO 2018)}}, title = {{{Making the Case for Proof-carrying Approximate Circuits}}}, year = {{2018}}, } @inproceedings{5547, author = {{Lösch, Achim and Platzner, Marco}}, booktitle = {{2018 IEEE 29th International Conference on Application-specific Systems, Architectures and Processors (ASAP)}}, isbn = {{9781538674796}}, location = {{Milan, Italy}}, publisher = {{IEEE}}, title = {{{A Highly Accurate Energy Model for Task Execution on Heterogeneous Compute Nodes}}}, doi = {{10.1109/asap.2018.8445098}}, year = {{2018}}, } @inproceedings{10598, abstract = {{Approximate computing has become a very popular design strategy that exploits error resilient computations to achieve higher performance and energy efficiency. Automated synthesis of approximate circuits is performed via functional approximation, in which various parts of the target circuit are extensively examined with a library of approximate components/transformations to trade off the functional accuracy and computational budget (i.e., power). However, as the number of possible approximate transformations increases, traditional search techniques suffer from a combinatorial explosion due to the large branching factor. In this work, we present a comprehensive framework for automated synthesis of approximate circuits from either structural or behavioral descriptions. We adapt the Monte Carlo Tree Search (MCTS), as a stochastic search technique, to deal with the large design space exploration, which enables a broader range of potential possible approximations through lightweight random simulations. The proposed framework is able to recognize the design Pareto set even with low computational budgets. Experimental results highlight the capabilities of the proposed synthesis framework by resulting in up to 61.69% energy saving while maintaining the predefined quality constraints.}}, author = {{Awais, Muhammad and Ghasemzadeh Mohammadi, Hassan and Platzner, Marco}}, booktitle = {{26th IFIP/IEEE International Conference on Very Large Scale Integration (VLSI-SoC)}}, keywords = {{Approximate computing, High-level synthesis, Accuracy, Monte-Carlo tree search, Circuit simulation}}, pages = {{219--224}}, title = {{{An MCTS-based Framework for Synthesis of Approximate Circuits}}}, doi = {{10.1109/VLSI-SoC.2018.8645026}}, year = {{2018}}, } @misc{10782, author = {{Clausing, Lennart}}, publisher = {{Ruhr-University Bochum}}, title = {{{Development of a Hardware / Software Codesign for sonification of LIDAR-based sensor data}}}, year = {{2018}}, } @misc{1097, author = {{Jentzsch, Felix Paul}}, keywords = {{Approximate Computing, Proof-Carrying Hardware, Formal Verification}}, publisher = {{Universität Paderborn}}, title = {{{Enforcing IP Core Connection Properties with Verifiable Security Monitors}}}, year = {{2018}}, } @article{12965, author = {{Ghribi, Ines and Abdallah, Riadh Ben and Khalgui, Mohamed and Li, Zhiwu and Alnowibet, Khalid and Platzner, Marco}}, issn = {{2169-3536}}, journal = {{IEEE Access}}, pages = {{14078--14092}}, title = {{{R-Codesign: Codesign Methodology for Real-Time Reconfigurable Embedded Systems Under Energy Constraints}}}, doi = {{10.1109/access.2018.2799852}}, year = {{2018}}, } @misc{3580, author = {{Hansmeier, Tim}}, publisher = {{Universität Paderborn}}, title = {{{An FPGA Accelerator for Checking Resolution Proofs}}}, year = {{2017}}, } @misc{1157, author = {{Witschen, Linus Matthias}}, publisher = {{Universität Paderborn}}, title = {{{A Framework for the Synthesis of Approximate Circuits}}}, year = {{2017}}, } @misc{74, author = {{Knorr, Christoph}}, publisher = {{Universität Paderborn}}, title = {{{OpenCL-basierte Videoverarbeitung auf heterogenen Rechenknoten}}}, year = {{2017}}, } @article{9919, abstract = {{This is a study of a combined load restoration and generator start-up procedure. The procedure is structured into three stages according to the power system status and the goal of load restoration. Moreover, for each load restoration stage, the proposed algorithm determines a load restoration sequence by considering renewable energy such as solar and wind park to achieve objective functions. The validity and performance of the proposed algorithm is demonstrated through simulations using IEEE-39 network.}}, author = {{Shen, Cong and Kaufmann, Paul and Braun, Martin}}, journal = {{Elsevier International Journal of Electrical Power and Energy Systems (IJEPES)}}, keywords = {{Load restorationRestoration stageRenewable energyVoltage/frequency fluctuations}}, pages = {{287--299}}, title = {{{Three-Stage Power System Restoration Methodology Considering Renewable Energies}}}, doi = {{10.1016/j.ijepes.2017.07.007}}, volume = {{94}}, year = {{2017}}, } @inproceedings{65, abstract = {{Heterogeneous compute nodes in form of CPUs with attached GPU and FPGA accelerators have strongly gained interested in the last years. Applications differ in their execution characteristics and can therefore benefit from such heterogeneous resources in terms of performance or energy consumption. While performance optimization has been the only goal for a long time, nowadays research is more and more focusing on techniques to minimize energy consumption due to rising electricity costs.This paper presents reMinMin, a novel static list scheduling approach for optimizing the total energy consumption for a set of tasks executed on a heterogeneous compute node. reMinMin bases on a new energy model that differentiates between static and dynamic energy components and covers effects of accelerator tasks on the host CPU. The required energy values are retrieved by measurements on the real computing system. In order to evaluate reMinMin, we compare it with two reference implementations on three task sets with different degrees of heterogeneity. In our experiments, MinMin is consistently better than a scheduler optimizing for dynamic energy only, which requires up to 19.43% more energy, and very close to optimal schedules.}}, author = {{Lösch, Achim and Platzner, Marco}}, booktitle = {{Proceedings of the 28th Annual IEEE International Conference on Application-specific Systems, Architectures and Processors (ASAP)}}, title = {{{reMinMin: A Novel Static Energy-Centric List Scheduling Approach Based on Real Measurements}}}, doi = {{10.1109/ASAP.2017.7995272}}, year = {{2017}}, } @article{68, abstract = {{Proof-carrying hardware (PCH) is a principle for achieving safety for dynamically reconfigurable hardware systems. The producer of a hardware module spends huge effort when creating a proof for a safety policy. The proof is then transferred as a certificate together with the configuration bitstream to the consumer of the hardware module, who can quickly verify the given proof. Previous work utilized SAT solvers and resolution traces to set up a PCH technology and corresponding tool flows. In this article, we present a novel technology for PCH based on inductive invariants. For sequential circuits, our approach is fundamentally stronger than the previous SAT-based one since we avoid the limitations of bounded unrolling. We contrast our technology to existing ones and show that it fits into previously proposed tool flows. We conduct experiments with four categories of benchmark circuits and report consumer and producer runtime and peak memory consumption, as well as the size of the certificates and the distribution of the workload between producer and consumer. Experiments clearly show that our new induction-based technology is superior for sequential circuits, whereas the previous SAT-based technology is the better choice for combinational circuits.}}, author = {{Isenberg, Tobias and Platzner, Marco and Wehrheim, Heike and Wiersema, Tobias}}, journal = {{ACM Transactions on Design Automation of Electronic Systems}}, number = {{4}}, pages = {{61:1----61:23}}, publisher = {{ACM}}, title = {{{Proof-Carrying Hardware via Inductive Invariants}}}, doi = {{10.1145/3054743}}, year = {{2017}}, } @article{10600, author = {{H.W. Leong, Philip and Amano, Hideharu and Anderson, Jason and Bertels, Koen and M.P. Cardoso, Jo\~{a}o and Diessel, Oliver and Gogniat, Guy and Hutton, Mike and Lee, JunKyu and Luk, Wayne and Lysaght, Patrick and Platzner, Marco and K. Prasanna, Viktor and Rissa, Tero and Silvano, Cristina and So, Hayden and Wang, Yu}}, journal = {{ACM Transactions on Reconfigurable Technology and Systems}}, title = {{{The First 25 Years of the FPL Conference – Significant Papers}}}, doi = {{10.1145/2996468}}, year = {{2017}}, } @article{10601, author = {{F. DeMara, Ronald and Platzner, Marco and Ottavi, Marco}}, journal = {{IEEE Transactions on Computers and IEEE Transactions on Emerging Topics in Computing}}, title = {{{Innovation in Reconfigurable Computing Fabrics: from Devices to Architectures (guest editorial)}}}, doi = {{10.1109/TETC.2016.2641599}}, year = {{2017}}, } @article{10611, author = {{Anwer, Jahanzeb and Platzner, Marco}}, journal = {{Microprocessors and Microsystems}}, pages = {{160--172}}, publisher = {{Elsevier}}, title = {{{Evaluating fault-tolerance of redundant FPGA structures using Boolean difference calculus}}}, doi = {{10.1016/j.micpro.2017.06.002}}, year = {{2017}}, }