@inproceedings{656, abstract = {{In the next decades, hybrid multi-cores will be the predominant architecture for reconfigurable FPGA-based systems. Temperature-aware thread mapping strategies are key for providing dependability in such systems. These strategies rely on measuring the temperature distribution and redicting the thermal behavior of the system when there are changes to the hardware and software running on the FPGA. While there are a number of tools that use thermal models to predict temperature distributions at design time, these tools lack the flexibility to autonomously adjust to changing FPGA configurations. To address this problem we propose a temperature-aware system that empowers FPGA-based reconfigurable multi-cores to autonomously predict the on-chip temperature distribution for pro-active thread remapping. Our system obtains temperature measurements through a self-calibrating grid of sensors and uses area constrained heat-generating circuits in order to generate spatial and temporal temperature gradients. The generated temperature variations are then used to learn the free parameters of the system's thermal model. The system thus acquires an understanding of its own thermal characteristics. We implemented an FPGA system containing a net of 144 temperature sensors on a Xilinx Virtex-6 LX240T FPGA that is aware of its thermal model. Finally, we show that the temperature predictions vary less than 0.72 degree C on average compared to the measured temperature distributions at run-time.}}, author = {{Happe, Markus and Agne, Andreas and Plessl, Christian}}, booktitle = {{Proceedings of the 2011 International Conference on Reconfigurable Computing and FPGAs (ReConFig)}}, pages = {{55--60}}, publisher = {{IEEE}}, title = {{{Measuring and Predicting Temperature Distributions on FPGAs at Run-Time}}}, doi = {{10.1109/ReConFig.2011.59}}, year = {{2011}}, } @inproceedings{2200, author = {{Kenter, Tobias and Platzner, Marco and Plessl, Christian and Kauschke, Michael}}, booktitle = {{Proc. Int. Symp. on Field-Programmable Gate Arrays (FPGA)}}, isbn = {{978-1-4503-0554-9}}, keywords = {{design space exploration, LLVM, partitioning, performance, estimation, funding-intel}}, pages = {{177--180}}, publisher = {{ACM}}, title = {{{Performance Estimation Framework for Automated Exploration of CPU-Accelerator Architectures}}}, doi = {{10.1145/1950413.1950448}}, year = {{2011}}, } @article{2201, author = {{Schumacher, Tobias and Süß, Tim and Plessl, Christian and Platzner, Marco}}, journal = {{Int. Journal of Recon- figurable Computing (IJRC)}}, keywords = {{funding-altera}}, publisher = {{Hindawi Publishing Corp.}}, title = {{{FPGA Acceleration of Communication-bound Streaming Applications: Architecture Modeling and a 3D Image Compositing Case Study}}}, doi = {{10.1155/2011/760954}}, year = {{2011}}, } @inproceedings{2198, author = {{Grad, Mariusz and Plessl, Christian}}, booktitle = {{Proc. Reconfigurable Architectures Workshop (RAW)}}, pages = {{278--285}}, publisher = {{IEEE Computer Society}}, title = {{{Just-in-time Instruction Set Extension – Feasibility and Limitations for an FPGA-based Reconfigurable ASIP Architecture}}}, doi = {{10.1109/IPDPS.2011.153}}, year = {{2011}}, } @inproceedings{2223, author = {{Lübbers, Enno and Platzner, Marco and Plessl, Christian and Keller, Ariane and Plattner, Bernhard}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-60132-140-6}}, pages = {{225--231}}, publisher = {{CSREA Press}}, title = {{{Towards Adaptive Networking for Embedded Devices based on Reconfigurable Hardware}}}, year = {{2010}}, } @inproceedings{2216, author = {{Grad, Mariusz and Plessl, Christian}}, booktitle = {{Proc. Int. Conf. on ReConFigurable Computing and FPGAs (ReConFig)}}, pages = {{67--72}}, publisher = {{IEEE Computer Society}}, title = {{{Pruning the Design Space for Just-In-Time Processor Customization}}}, doi = {{10.1109/ReConFig.2010.19}}, year = {{2010}}, } @inproceedings{2224, author = {{Grad, Mariusz and Plessl, Christian}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-60132-140-6}}, pages = {{144--150}}, publisher = {{CSREA Press}}, title = {{{An Open Source Circuit Library with Benchmarking Facilities}}}, year = {{2010}}, } @inproceedings{2220, author = {{Andrews, David and Plessl, Christian}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-60132-140-6}}, pages = {{165}}, publisher = {{CSREA Press}}, title = {{{Configurable Processor Architectures: History and Trends}}}, year = {{2010}}, } @proceedings{2222, editor = {{Plaks, Toomas P. and Andrews, David and DeMara, Ronald and Lam, Herman and Lee, Jooheung and Plessl, Christian and Stitt, Greg}}, isbn = {{1-60132-140-6}}, publisher = {{CSREA Press}}, title = {{{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}}, year = {{2010}}, } @inproceedings{2226, author = {{Beisel, Tobias and Niekamp, Manuel and Plessl, Christian}}, booktitle = {{Proc. Int. Conf. on Application-Specific Systems, Architectures, and Processors (ASAP)}}, isbn = {{978-1-4244-6965-9}}, pages = {{65--72}}, publisher = {{IEEE Computer Society}}, title = {{{Using Shared Library Interposing for Transparent Acceleration in Systems with Heterogeneous Hardware Accelerators}}}, doi = {{10.1109/ASAP.2010.5540798}}, year = {{2010}}, } @inproceedings{2206, author = {{Keller, Ariane and Plattner, Bernhard and Lübbers, Enno and Platzner, Marco and Plessl, Christian}}, booktitle = {{Proc. IEEE Globecom Workshop on Network of the Future (FutureNet)}}, isbn = {{978-1-4244-8864-3}}, pages = {{372--376}}, publisher = {{IEEE}}, title = {{{Reconfigurable Nodes for Future Networks}}}, doi = {{10.1109/GLOCOMW.2010.5700341}}, year = {{2010}}, } @inproceedings{2227, author = {{Woehrle, Matthias and Plessl, Christian and Thiele, Lothar}}, booktitle = {{Proc. Int. Conf. Networked Sensing Systems (INSS)}}, isbn = {{978-1-4244-7911-5}}, pages = {{245--248}}, publisher = {{IEEE}}, title = {{{Rupeas: Ruby Powered Event Analysis DSL}}}, doi = {{10.1109/INSS.2010.5572211}}, year = {{2010}}, } @inproceedings{2228, author = {{Kenter, Tobias and Platzner, Marco and Plessl, Christian and Kauschke, Michael}}, booktitle = {{Proc. Workshop on Architectural Research Prototyping (WARP), International Symposium on Computer Architecture (ISCA)}}, editor = {{Hammami, Omar and Larrabee, Sandra}}, title = {{{Performance Estimation for the Exploration of CPU-Accelerator Architectures}}}, year = {{2010}}, } @techreport{2353, abstract = {{Wireless Sensor Networks (WSNs) are unique embedded computation systems for distributed sensing of a dispersed phenomenon. While being a strongly concurrent distributed system, its embedded aspects with severe resource limitations and the wireless communication requires a fusion of technologies and methodologies from very different fields. As WSNs are deployed in remote locations for long-term unattended operation, assurance of correct functioning of the system is of prime concern. Thus, the design and development of WSNs requires specialized tools to allow for testing and debugging the system. To this end, we present a framework for analyzing and checking WSNs based on collected events during system operation. It allows for abstracting from the event trace by means of behavioral queries and uses assertions for checking the accordance of an execution to its specification. The framework is independent from WSN test platforms, applications and logging semantics and thus generally applicable for analyzing event logs of WSN test executions. }}, author = {{Woehrle, Matthias and Plessl, Christian and Thiele, Lothar}}, keywords = {{Rupeas, DSL, WSN, testing}}, title = {{{Rupeas: Ruby Powered Event Analysis DSL}}}, year = {{2009}}, } @inproceedings{2350, abstract = {{Mapping applications that consist of a collection of cores to FPGA accelerators and optimizing their performance is a challenging task in high performance reconfigurable computing. We present IMORC, an architectural template and highly versatile on-chip interconnect. IMORC links provide asynchronous FIFOs and bitwidth conversion which allows for flexibly composing accelerators from cores running at full speed within their own clock domains, thus facilitating the re-use of cores and portability. Further, IMORC inserts performance counters for monitoring runtime data. In this paper, we first introduce the IMORC architectural template and the on-chip interconnect, and then demonstrate IMORC on the example of accelerating the k-th nearest neighbor thinning problem on an XD1000 reconfigurable computing system. Using IMORC's monitoring infrastructure, we gain insights into the data-dependent behavior of the application which, in turn, allow for optimizing the accelerator. }}, author = {{Schumacher, Tobias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)}}, isbn = {{978-1-4244-4450-2}}, keywords = {{IMORC, interconnect, performance}}, pages = {{275--278}}, publisher = {{IEEE Computer Society}}, title = {{{IMORC: Application Mapping, Monitoring and Optimization for High-Performance Reconfigurable Computing}}}, doi = {{10.1109/FCCM.2009.25}}, year = {{2009}}, } @inproceedings{2262, abstract = {{In this work we present EvoCache, a novel approach for implementing application-specific caches. The key innovation of EvoCache is to make the function that maps memory addresses from the CPU address space to cache indices programmable. We support arbitrary Boolean mapping functions that are implemented within a small reconfigurable logic fabric. For finding suitable cache mapping functions we rely on techniques from the evolvable hardware domain and utilize an evolutionary optimization procedure. We evaluate the use of EvoCache in an embedded processor for two specific applications (JPEG and BZIP2 compression) with respect to execution time, cache miss rate and energy consumption. We show that the evolvable hardware approach for optimizing the cache functions not only significantly improves the cache performance for the training data used during optimization, but that the evolved mapping functions generalize very well. Compared to a conventional cache architecture, EvoCache applied to test data achieves a reduction in execution time of up to 14.31% for JPEG (10.98% for BZIP2), and in energy consumption by 16.43% for JPEG (10.70% for BZIP2). We also discuss the integration of EvoCache into the operating system and show that the area and delay overheads introduced by EvoCache are acceptable. }}, author = {{Kaufmann, Paul and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. NASA/ESA Conference on Adaptive Hardware and Systems (AHS)}}, keywords = {{EvoCache, evolvable hardware, computer architecture}}, pages = {{11--18}}, publisher = {{IEEE Computer Society}}, title = {{{EvoCaches: Application-specific Adaptation of Cache Mapping}}}, year = {{2009}}, } @inproceedings{2352, author = {{Beutel, Jan and Gruber, Stephan and Hasler, Andi and Lim, Roman and Meier, Andreas and Plessl, Christian and Talzi, Igor and Thiele, Lothar and Tschudin, Christian and Woehrle, Matthias and Yuecel, Mustafa}}, booktitle = {{Proc. Int. Conf. on Information Processing in Sensor Networks (IPSN)}}, isbn = {{978-1-4244-5108-1}}, keywords = {{WSN, PermaSense}}, pages = {{265--276}}, publisher = {{IEEE Computer Society}}, title = {{{PermaDAQ: A Scientific Instrument for Precision Sensing and Data Recovery in Environmental Extremes}}}, year = {{2009}}, } @inproceedings{2238, author = {{Schumacher, Tobias and Süß, Tim and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on ReConFigurable Computing and FPGAs (ReConFig)}}, isbn = {{978-0-7695-3917-1}}, keywords = {{IMORC, graphics}}, pages = {{119--124}}, publisher = {{IEEE Computer Society}}, title = {{{Communication Performance Characterization for Reconfigurable Accelerator Design on the XD1000}}}, doi = {{10.1109/ReConFig.2009.32}}, year = {{2009}}, } @inproceedings{2261, author = {{Schumacher, Tobias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}}, isbn = {{978-1-4244-3892-1}}, issn = {{1946-1488}}, keywords = {{IMORC, NOC, KNN, accelerator}}, pages = {{338--344}}, publisher = {{IEEE}}, title = {{{An Accelerator for k-th Nearest Neighbor Thinning Based on the IMORC Infrastructure}}}, year = {{2009}}, } @inproceedings{2263, abstract = {{In this paper, we introduce the Woolcano reconfigurable processor architecture. The architecture is based on the Xilinx Virtex-4 FX FPGA and leverages the Auxiliary Processing Unit (APU) as well as the partial reconfiguration capabilities to provide dynamically reconfigurable custom instructions. We also present a hardware tool flow that automatically translates software functions into custom instructions and a software tool flow that creates binaries using these instructions. While previous research on processors with reconfigurable functional units has been performed predominantly with simulation, the Woolcano architecture allows for exploring dynamic instruction set extension with commercially available hardware. Finally, we present a case study demonstrating a custom floating-point instruction generated with our approach, which achieves a 40x speedup over software-emulated floating-point operations and a 21% speedup over the Xilinx hardware floating-point unit. }}, author = {{Grad, Mariusz and Plessl, Christian}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-60132-101-5}}, pages = {{319--322}}, publisher = {{CSREA Press}}, title = {{{Woolcano: An Architecture and Tool Flow for Dynamic Instruction Set Extension on Xilinx Virtex-4 FX}}}, year = {{2009}}, }