@inproceedings{2206, author = {{Keller, Ariane and Plattner, Bernhard and Lübbers, Enno and Platzner, Marco and Plessl, Christian}}, booktitle = {{Proc. IEEE Globecom Workshop on Network of the Future (FutureNet)}}, isbn = {{978-1-4244-8864-3}}, pages = {{372--376}}, publisher = {{IEEE}}, title = {{{Reconfigurable Nodes for Future Networks}}}, doi = {{10.1109/GLOCOMW.2010.5700341}}, year = {{2010}}, } @inproceedings{2227, author = {{Woehrle, Matthias and Plessl, Christian and Thiele, Lothar}}, booktitle = {{Proc. Int. Conf. Networked Sensing Systems (INSS)}}, isbn = {{978-1-4244-7911-5}}, pages = {{245--248}}, publisher = {{IEEE}}, title = {{{Rupeas: Ruby Powered Event Analysis DSL}}}, doi = {{10.1109/INSS.2010.5572211}}, year = {{2010}}, } @inproceedings{2228, author = {{Kenter, Tobias and Platzner, Marco and Plessl, Christian and Kauschke, Michael}}, booktitle = {{Proc. Workshop on Architectural Research Prototyping (WARP), International Symposium on Computer Architecture (ISCA)}}, editor = {{Hammami, Omar and Larrabee, Sandra}}, title = {{{Performance Estimation for the Exploration of CPU-Accelerator Architectures}}}, year = {{2010}}, } @techreport{2353, abstract = {{Wireless Sensor Networks (WSNs) are unique embedded computation systems for distributed sensing of a dispersed phenomenon. While being a strongly concurrent distributed system, its embedded aspects with severe resource limitations and the wireless communication requires a fusion of technologies and methodologies from very different fields. As WSNs are deployed in remote locations for long-term unattended operation, assurance of correct functioning of the system is of prime concern. Thus, the design and development of WSNs requires specialized tools to allow for testing and debugging the system. To this end, we present a framework for analyzing and checking WSNs based on collected events during system operation. It allows for abstracting from the event trace by means of behavioral queries and uses assertions for checking the accordance of an execution to its specification. The framework is independent from WSN test platforms, applications and logging semantics and thus generally applicable for analyzing event logs of WSN test executions. }}, author = {{Woehrle, Matthias and Plessl, Christian and Thiele, Lothar}}, keywords = {{Rupeas, DSL, WSN, testing}}, title = {{{Rupeas: Ruby Powered Event Analysis DSL}}}, year = {{2009}}, } @inproceedings{2350, abstract = {{Mapping applications that consist of a collection of cores to FPGA accelerators and optimizing their performance is a challenging task in high performance reconfigurable computing. We present IMORC, an architectural template and highly versatile on-chip interconnect. IMORC links provide asynchronous FIFOs and bitwidth conversion which allows for flexibly composing accelerators from cores running at full speed within their own clock domains, thus facilitating the re-use of cores and portability. Further, IMORC inserts performance counters for monitoring runtime data. In this paper, we first introduce the IMORC architectural template and the on-chip interconnect, and then demonstrate IMORC on the example of accelerating the k-th nearest neighbor thinning problem on an XD1000 reconfigurable computing system. Using IMORC's monitoring infrastructure, we gain insights into the data-dependent behavior of the application which, in turn, allow for optimizing the accelerator. }}, author = {{Schumacher, Tobias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)}}, isbn = {{978-1-4244-4450-2}}, keywords = {{IMORC, interconnect, performance}}, pages = {{275--278}}, publisher = {{IEEE Computer Society}}, title = {{{IMORC: Application Mapping, Monitoring and Optimization for High-Performance Reconfigurable Computing}}}, doi = {{10.1109/FCCM.2009.25}}, year = {{2009}}, } @inproceedings{2262, abstract = {{In this work we present EvoCache, a novel approach for implementing application-specific caches. The key innovation of EvoCache is to make the function that maps memory addresses from the CPU address space to cache indices programmable. We support arbitrary Boolean mapping functions that are implemented within a small reconfigurable logic fabric. For finding suitable cache mapping functions we rely on techniques from the evolvable hardware domain and utilize an evolutionary optimization procedure. We evaluate the use of EvoCache in an embedded processor for two specific applications (JPEG and BZIP2 compression) with respect to execution time, cache miss rate and energy consumption. We show that the evolvable hardware approach for optimizing the cache functions not only significantly improves the cache performance for the training data used during optimization, but that the evolved mapping functions generalize very well. Compared to a conventional cache architecture, EvoCache applied to test data achieves a reduction in execution time of up to 14.31% for JPEG (10.98% for BZIP2), and in energy consumption by 16.43% for JPEG (10.70% for BZIP2). We also discuss the integration of EvoCache into the operating system and show that the area and delay overheads introduced by EvoCache are acceptable. }}, author = {{Kaufmann, Paul and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. NASA/ESA Conference on Adaptive Hardware and Systems (AHS)}}, keywords = {{EvoCache, evolvable hardware, computer architecture}}, pages = {{11--18}}, publisher = {{IEEE Computer Society}}, title = {{{EvoCaches: Application-specific Adaptation of Cache Mapping}}}, year = {{2009}}, } @inproceedings{2352, author = {{Beutel, Jan and Gruber, Stephan and Hasler, Andi and Lim, Roman and Meier, Andreas and Plessl, Christian and Talzi, Igor and Thiele, Lothar and Tschudin, Christian and Woehrle, Matthias and Yuecel, Mustafa}}, booktitle = {{Proc. Int. Conf. on Information Processing in Sensor Networks (IPSN)}}, isbn = {{978-1-4244-5108-1}}, keywords = {{WSN, PermaSense}}, pages = {{265--276}}, publisher = {{IEEE Computer Society}}, title = {{{PermaDAQ: A Scientific Instrument for Precision Sensing and Data Recovery in Environmental Extremes}}}, year = {{2009}}, } @inproceedings{2238, author = {{Schumacher, Tobias and Süß, Tim and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on ReConFigurable Computing and FPGAs (ReConFig)}}, isbn = {{978-0-7695-3917-1}}, keywords = {{IMORC, graphics}}, pages = {{119--124}}, publisher = {{IEEE Computer Society}}, title = {{{Communication Performance Characterization for Reconfigurable Accelerator Design on the XD1000}}}, doi = {{10.1109/ReConFig.2009.32}}, year = {{2009}}, } @inproceedings{2261, author = {{Schumacher, Tobias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}}, isbn = {{978-1-4244-3892-1}}, issn = {{1946-1488}}, keywords = {{IMORC, NOC, KNN, accelerator}}, pages = {{338--344}}, publisher = {{IEEE}}, title = {{{An Accelerator for k-th Nearest Neighbor Thinning Based on the IMORC Infrastructure}}}, year = {{2009}}, } @inproceedings{2263, abstract = {{In this paper, we introduce the Woolcano reconfigurable processor architecture. The architecture is based on the Xilinx Virtex-4 FX FPGA and leverages the Auxiliary Processing Unit (APU) as well as the partial reconfiguration capabilities to provide dynamically reconfigurable custom instructions. We also present a hardware tool flow that automatically translates software functions into custom instructions and a software tool flow that creates binaries using these instructions. While previous research on processors with reconfigurable functional units has been performed predominantly with simulation, the Woolcano architecture allows for exploring dynamic instruction set extension with commercially available hardware. Finally, we present a case study demonstrating a custom floating-point instruction generated with our approach, which achieves a 40x speedup over software-emulated floating-point operations and a 21% speedup over the Xilinx hardware floating-point unit. }}, author = {{Grad, Mariusz and Plessl, Christian}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-60132-101-5}}, pages = {{319--322}}, publisher = {{CSREA Press}}, title = {{{Woolcano: An Architecture and Tool Flow for Dynamic Instruction Set Extension on Xilinx Virtex-4 FX}}}, year = {{2009}}, } @inproceedings{2370, author = {{Woehrle, Matthias and Plessl, Christian and Lim, Roman and Beutel, Jan and Thiele, Lothar}}, booktitle = {{IEEE Int. Conf. on Sensor Networks, Ubiquitous, and Trustworthy Computing (SUTC)}}, isbn = {{978-0-7695-3158-8}}, keywords = {{WSN, testing, verification}}, pages = {{201--208}}, publisher = {{IEEE Computer Society}}, title = {{{EvAnT: Analysis and Checking of event traces for Wireless Sensor Networks}}}, doi = {{10.1109/SUTC.2008.24}}, year = {{2008}}, } @inproceedings{2364, author = {{Schumacher, Tobias and Meiche, Robert and Kaufmann, Paul and Lübbers, Enno and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-60132-064-7}}, pages = {{245--251}}, publisher = {{CSREA Press}}, title = {{{A Hardware Accelerator for k-th Nearest Neighbor Thinning}}}, year = {{2008}}, } @inproceedings{2372, author = {{Schumacher, Tobias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Many-core and Reconfigurable Supercomputing Conference (MRSC)}}, keywords = {{IMORC, IP core, interconnect}}, title = {{{IMORC: An infrastructure for performance monitoring and optimization of reconfigurable computers}}}, year = {{2008}}, } @techreport{2394, author = {{Beutel, Jan and Plessl, Christian and Woehrle, Matthias}}, title = {{{Increasing the Reliability of Wireless Sensor Networks with a Unit Testing Framework}}}, year = {{2007}}, } @inproceedings{2392, author = {{Woehrle, Matthias and Plessl, Christian and Beutel, Jan and Thiele, Lothar}}, booktitle = {{Proc. Workshop on Embedded Networked Sensors (EmNets)}}, isbn = {{978-1-59593-694-3}}, keywords = {{WSN, testing, distributed, embedded}}, pages = {{93--97}}, publisher = {{ACM}}, title = {{{Increasing the Reliability of Wireless Sensor Networks with a Distributed Testing Framework}}}, doi = {{10.1145/1278972.1278996}}, year = {{2007}}, } @inproceedings{2393, author = {{Beutel, Jan and Dyer, Matthias and Lim, Roman and Plessl, Christian and Woehrle, Matthias and Yuecel, Mustafa and Thiele, Lothar}}, booktitle = {{Proc. Int. Conf. Networked Sensing Systems (INSS)}}, isbn = {{1-4244-1231-5}}, keywords = {{WSN, testing, verification}}, pages = {{303--303}}, publisher = {{IEEE}}, title = {{{Automated Wireless Sensor Network Testing}}}, doi = {{10.1109/INSS.2007.4297445}}, year = {{2007}}, } @phdthesis{2404, abstract = {{ In this thesis, we propose to use a reconfigurable processor as main computation element in embedded systems for applications from the multi-media and communications domain. A reconfigurable processor integrates an embedded CPU core with a Reconfigurable Processing Unit (RPU). Many of our target applications require real-time signal-processing of data streams and expose a high computational demand. The key challenge in designing embedded systems for these applications is to find an implementation that satisfies the performance goals and is adaptable to new applications, while the system cost is minimized. Implementations that solely use an embedded CPU are likely to miss the performance goals. Application-Specific Integrated Circuit (ASIC)-based coprocessors can be used for some high-volume products with fixed functions, but fall short for systems with varying applications. We argue that a reconfigurable processor with a coarse-grained, dynamically reconfigurable array of modest size provides an attractive implementation platform for our application domain. The computational intensive application kernels are executed on the RPU, while the remaining parts of the application are executed on the CPU. Reconfigurable hardware allows for implementing application specific coprocessors with a high performance, while the function of the coprocessor can still be adapted due to the programmability. So far, reconfigurable technology is used in embedded systems primarily with static configurations, e.g., for implementing glue-logic, replacing ASICs, and for implementing fixed-function coprocessors. Changing the configuration at runtime enables a number of interesting application modes, e.g., on-demand loading of coprocessors and time-multiplexed execution of coprocessors, which is commonly denoted as hardware virtualization. While the use of static configurations is well understood and supported by design-tools, the role of dynamic reconfiguration is not well investigated yet. Current application specification methods and design-tools do not provide an end-to-end tool-flow that considers dynamic reconfiguration. A key idea of our approach is to reduce system cost by keeping the size of the reconfigurable array small and to use hardware virtualization techniques to compensate for the limited hardware resources. The main contribution of this thesis is the codesign of a reconfigurable processor architecture named ZIPPY, the corresponding hardware and software implementation tools, and an application specification model which explicitly considers hardware virtualization. The ZIPPY architecture is widely parametrized and allows for specifying a whole family of processor architectures. The implementation tools are also parametrized and can target any architectural variant. We evaluate the performance of the architecture with a system-level, cycle-accurate cosimulation framework. This framework enables us to perform design-space exploration for a variety of reconfigurable processor architectures. With two case studies, we demonstrate, that hardware virtualization on the Zippy architecture is feasible and enables us to trade-off performance for area in embedded systems. Finally, we present a novel method for optimal temporal partitioning of sequential circuits, which is an important form of hardware virtualization. The method based on Slowdown and Retiming allows us to decompose any sequential circuit into a number of smaller, communicating subcircuits that can be executed on a dynamically reconfigurable architecture. }}, author = {{Plessl, Christian}}, isbn = {{978-3-8322-5561-3}}, keywords = {{Zippy}}, publisher = {{Shaker Verlag}}, title = {{{Hardware virtualization on a coarse-grained reconfigurable processor}}}, doi = {{10.2370/9783832255619}}, year = {{2006}}, } @inproceedings{2401, abstract = {{ This paper presents a novel method for optimal temporal partitioning of sequential circuits for time-multiplexed reconfigurable architectures. The method bases on slowdown and retiming and maximizes the circuit's performance during execution while restricting the size of the partitions to respect the resource constraints of the reconfigurable architecture. We provide a mixed integer linear program (MILP) formulation of the problem, which can be solved exactly. In contrast to related work, our approach optimizes performance directly, takes structural modifications of the circuit into account, and is extensible. We present the application of the new method to temporal partitioning for a coarse-grained reconfigurable architecture. }}, author = {{Plessl, Christian and Platzner, Marco and Thiele, Lothar}}, booktitle = {{Proc. Int. Conf. on Field Programmable Technology (ICFPT)}}, keywords = {{temporal partitioning, retiming, ILP}}, pages = {{345--348}}, publisher = {{IEEE Computer Society}}, title = {{{Optimal Temporal Partitioning based on Slowdown and Retiming}}}, doi = {{10.1109/FPT.2006.270344}}, year = {{2006}}, } @inproceedings{2411, abstract = {{ This paper motivates the use of hardware virtualization on coarse-grained reconfigurable architectures. We introduce Zippy, a coarse-grained multi-context hybrid CPU with architectural support for efficient hardware virtualization. The architectural details and the corresponding tool flow are outlined. As a case study, we compare the non-virtualized and the virtualized execution of an ADPCM decoder. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Application-Specific Systems, Architectures, and Processors (ASAP)}}, keywords = {{Zippy}}, pages = {{213--218}}, publisher = {{IEEE Computer Society}}, title = {{{Zippy – A coarse-grained reconfigurable array with support for hardware virtualization}}}, doi = {{10.1109/ASAP.2005.69}}, year = {{2005}}, } @article{2412, abstract = {{ Reconfigurable architectures that tightly integrate a standard CPU core with a field-programmable hardware structure have recently been receiving impact of these design decisions on the overall system performance is a challenging task. In this paper, we first present a framework for the cycle-accurate performance evaluation of hybrid reconfigurable processors on the system level. Then, we discuss a reconfigurable processor for data-streaming applications, which attaches a coarse-grained reconfigurable unit to the coprocessor interface of a standard embedded CPU core. By means of a case study we evaluate the system-level impact of certain design features for the reconfigurable unit, such as multiple contexts, register replication, and hardware context scheduling. The results illustrate that a system-level evaluation framework is of paramount importance for studying the architectural trade-offs and optimizing design parameters for reconfigurable processors.}}, author = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}}, journal = {{Microprocessors and Microsystems}}, keywords = {{FPGA, reconfigurable computing, co-simulation, Zippy}}, number = {{2-3}}, pages = {{63--73}}, publisher = {{Elsevier}}, title = {{{System-level performance evaluation of reconfigurable processors}}}, doi = {{10.1016/j.micpro.2004.06.004}}, volume = {{29}}, year = {{2005}}, } @inproceedings{2415, abstract = {{In this paper we introduce to virtualization of hardware on reconfigurable devices. We identify three main approaches denoted with temporal partitioning, virtualized execution, and virtual machine. For each virtualization approach, we discuss the application models, the required execution architectures, the design tools and the run-time systems. Then, we survey a selection of important projects in the field. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, keywords = {{hardware virtualization}}, pages = {{63--69}}, publisher = {{CSREA Press}}, title = {{{Virtualization of Hardware – Introduction and Survey}}}, year = {{2004}}, } @inproceedings{2418, abstract = {{ This paper presents TKDM, a PC-based high-performance reconfigurable computing environment. The TKDM hardware consists of an FPGA module that uses the DIMM (dual inline memory module) bus for high-bandwidth and low-latency communication with the host CPU. The system's firmware is integrated with the Linux host operating system and offers functions for data communication and FPGA reconfiguration. The intended use of TKDM is that of a dynamically reconfigurable co-processor for data streaming applications. The system's firmware can be customized for specific application domains to facilitate simple and easy-to-use programming interfaces. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Technology (ICFPT)}}, keywords = {{coprocessor, DIMM, memory bus, FPGA, high performance computing}}, pages = {{252--259}}, publisher = {{IEEE Computer Society}}, title = {{{TKDM – A Reconfigurable Co-processor in a PC's Memory Slot}}}, doi = {{10.1109/FPT.2003.1275755}}, year = {{2003}}, } @article{2419, abstract = {{Wearable computers are embedded into the mobile environment of their users. A design challenge for wearable systems is to combine the high performance required for tasks such as video decoding with the low energy consumption required to maximise battery runtimes and the flexibility demanded by the dynamics of the environment and the applications. In this paper, we demonstrate that reconfigurable hardware technology is able to answer this challenge. We present the concept and the prototype implementation of an autonomous wearable unit with reconfigurable modules (WURM). We discuss experiments that show the uses of reconfigurable hardware in WURM: ASICs-on-demand and adaptive interfaces. Finally, we present an experiment with an operating system layer for WURM.}}, author = {{Plessl, Christian and Enzler, Rolf and Walder, Herbert and Beutel, Jan and Platzner, Marco and Thiele, Lothar and Tröster, Gerhard}}, journal = {{Personal and Ubiquitous Computing}}, number = {{5}}, pages = {{299--308}}, publisher = {{Springer}}, title = {{{The Case for Reconfigurable Hardware in Wearable Computing}}}, doi = {{10.1007/s00779-003-0243-x}}, volume = {{7}}, year = {{2003}}, } @article{2420, abstract = {{ This paper presents the acceleration of minimum-cost covering problems by instance-specific hardware. First, we formulate the minimum-cost covering problem and discuss a branch \& bound algorithm to solve it. Then we describe instance-specific hardware architectures that implement branch \& bound in 3-valued logic and use reduction techniques similar to those found in software solvers. We further present prototypical accelerator implementations and a corresponding design tool flow. Our experiments reveal significant raw speedups up to five orders of magnitude for a set of smaller unate covering problems. Provided that hardware compilation times can be reduced, we conclude that instance-specific acceleration of hard minimum-cost covering problems will lead to substantial overall speedups. }}, author = {{Plessl, Christian and Platzner, Marco}}, issn = {{0920-8542}}, journal = {{Journal of Supercomputing}}, keywords = {{reconfigurable computing, instance-specific acceleration, minimum covering}}, number = {{2}}, pages = {{109--129}}, publisher = {{Kluwer Academic Publishers}}, title = {{{Instance-Specific Accelerators for Minimum Covering}}}, doi = {{10.1023/a:1024443416592}}, volume = {{26}}, year = {{2003}}, } @inproceedings{2421, abstract = {{In contrast to processors, current reconfigurable devices totally lack programming models that would allow for device independent compilation and forward compatibility. The key to overcome this limitation is hardware virtualization. In this paper, we resort to a macro-pipelined execution model to achieve hardware virtualization for data streaming applications. As a hardware implementation we present a hybrid multi-context architecture that attaches a coarse-grained reconfigurable array to a host CPU. A co-simulation framework enables cycle-accurate simulation of the complete architecture. As a case study we map an FIR filter to our virtualized hardware model and evaluate different designs. We discuss the impact of the number of contexts and the feature of context state on the speedup and the CPU load.}}, author = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}}, keywords = {{Zippy, multi-context, FPGA}}, pages = {{151--160}}, publisher = {{Springer}}, title = {{{Virtualizing Hardware with Multi-Context Reconfigurable Arrays}}}, doi = {{10.1007/b12007}}, volume = {{2778}}, year = {{2003}}, } @inproceedings{2422, abstract = {{Reconfigurable computing architectures aim to dynamically adapt their hardware to the application at hand. As research shows, the time it takes to reconfigure the hardware forms an overhead that can significantly impair the benefits of hardware customization. Multi-context devices are one promising approach to overcome the limitations posed by long reconfiguration times. In contrast to more traditional reconfigurable architectures, multi-context devices hold several configurations on-chip. On demand, the device can quickly switch to another context. In this paper we present a co-simulation environment to investigate design trade-offs for hybrid multi-context architectures. Our architectural model comprises a reconfigurable unit closely coupled to a CPU core. As a case study, we discuss the implementation of a FIR filter partitioned into several contexts. We outline the mapping process and present simulation results for single- and multi-context reconfigurable units coupled with both embedded and high-end CPUs.}}, author = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-932415-05-X}}, keywords = {{Zippy, co-simulation}}, pages = {{174--180}}, publisher = {{CSREA Press}}, title = {{{Co-simulation of a Hybrid Multi-Context Architecture}}}, year = {{2003}}, } @inproceedings{2423, abstract = {{Wearable computers are embedded into the mobile environment of the human body. A design challenge for wearable systems is to combine the high performance required for tasks such as video decoding with low energy consumption required to maximize battery runtimes and the flexibility demanded by the dynamics of the environment and the applications. In this paper, we demonstrate that reconfigurable hardware technology is able to answer this challenge. We present the concept and the prototype implementation of an autonomous wearable unit with reconfigurable modules (WURM). We discuss two experiments that show the uses of reconfigurable hardware in WURM: ASICs-on-demand and adaptive interfaces. Finally, we develop and evaluate task placement techniques used in the operating system layer of WURM.}}, author = {{Plessl, Christian and Enzler, Rolf and Walder, Herbert and Beutel, Jan and Platzner, Marco and Thiele, Lothar}}, booktitle = {{Proc. Int. Symp. on Wearable Computers (ISWC)}}, isbn = {{0-7695-1816-8}}, keywords = {{wearable computing}}, pages = {{215--222}}, publisher = {{IEEE Computer Society}}, title = {{{Reconfigurable Hardware in Wearable Computing Nodes}}}, doi = {{10.1109/ISWC.2002.1167250}}, year = {{2002}}, } @inproceedings{2424, abstract = {{ Recent generations of high-density and high-speed FPGAs provide a sufficient capacity for implementing complete configurable systems on a chip (CSoCs). Hybrid CPUs that combine standard CPU cores with reconfigurable coprocessors are an important subclass of CSoCs. With partially reconfigurable FPGAs, coprocessors can be loaded on demand while the CPU remains running. However, the lack of high-level design tools for partial reconfiguration makes practical implementations a challenging task. In this paper, we introduce a design flow to implement hybrid processors on Xilinx Virtex. The design flow is based on two techniques, virtual sockets and feed-through components, and can efficiently generate partial configurations from industry-quality cores. We discuss the design flow and present a fully operational audio streaming prototype to demonstrate its feasibility. }}, author = {{Dyer, Matthias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}}, keywords = {{partial reconfiguration}}, pages = {{292--301}}, publisher = {{Springer}}, title = {{{Partially Reconfigurable Cores for Xilinx Virtex}}}, doi = {{10.1007/3-540-46117-5}}, volume = {{2438}}, year = {{2002}}, } @inproceedings{2425, abstract = {{ We present instance-specific custom computing machines for the set covering problem. Four accelerator architectures are developed that implement branch \& bound in 3-valued logic and many of the deduction techniques found in software solvers. We use set covering benchmarks from two-level logic minimization and Steiner triple systems to derive and discuss experimental results. The resulting raw speedups are in the order of four magnitudes on average. Finally, we propose a hybrid solver architecture that combines the raw speed of instance-specific reconfigurable hardware with flexible bounding schemes implemented in software. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)}}, pages = {{163--172}}, publisher = {{IEEE Computer Society}}, title = {{{Custom Computing Machines for the Set Covering Problem}}}, doi = {{10.1109/FPGA.2002.1106671}}, year = {{2002}}, } @inproceedings{2428, abstract = {{ In this paper we present instance-specific accelerators for minimum-cost covering problems. We first define the covering problem and discuss a branch&bound algorithm to solve it. Then we describe an instance-specific hardware architecture that implements branch&bound in 3-valued logic and uses reduction techniques usually found in software solvers. Results for small unate covering problems reveal significant raw speedups. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, keywords = {{minimum covering, accelerator, funding-sundance}}, pages = {{85--91}}, publisher = {{CSREA Press}}, title = {{{Instance-Specific Accelerators for Minimum Covering}}}, year = {{2001}}, } @article{2429, author = {{Plessl, Christian and Wilde, Erik}}, journal = {{iX}}, pages = {{88--93}}, publisher = {{Heise Verlag}}, title = {{{Server-Side-Techniken im Web – ein Überblick}}}, year = {{2001}}, } @misc{2430, abstract = {{In this report the design and implementation of an instance-specific accelerator for solving minimum covering problems will be presented. After an introduction to configurable computing in general, the minimum covering problem is defined and a branch and bound algorithm to solve it in software is presented. The remainder of the report shows how this branch and bound algorithm can be adopted to hardware. Specifically it is stressed how the various sophisticated strategies for deducing conditions for variables used by software solvers can be adopted to hardware and how a system which uses 3-valued logic to solve this problem can be designed. In addition to these considerations focusing on the architecture of the system, some important details of the actual implementation are given. A prototype has been implemented for showing the feasibility of the concept and for gaining information about speed and size of the hardware implementation. Cycle-accurate simulations for a set of benchmark problems have been done for determining the performance of the accelerator. The speed of the resulting accelerators has been compared to the time a reference software solver (espresso) needs and the resulting speedups have been calculated. I have shown that a raw speedup of several orders of maginitude can be achieved for many problems; for some problems no speedup is achieved yet. After a discussion of the results, ideas for future work are presented.}}, author = {{Plessl, Christian}}, publisher = {{Computer Engineering and Networks Lab, ETH Zurich, Switzerland}}, title = {{{Reconfigurable Accelerators for Minimum Covering}}}, year = {{2001}}, } @inproceedings{2432, abstract = {{In this paper, we present the analysis of applications from the domain of handheld and wearable computing. This analysis is the first step to derive and evaluate design parameters for dynamically reconfigurable processors. We discuss the selection of representative benchmarks for handhelds and wearables and group the applications into multimedia, communications, and cryptography programs. We simulate the applications on a cycle-accurate processor simulator and gather statistical data such as instruction mix, cache hit rates and memory requirements for an embedded processor model. A breakdown of the executed cycles into different functions identifies the most compute-intensive code sections - the kernels. Then, we analyze the applications and discuss parameters that strongly influence the design of dynamically reconfigurable processors. Finally, we outline the construction of a parameterizable simulation model for a reconfigurable unit that is attached to a processor core.}}, author = {{Enzler, Rolf and Platzner, Marco and Plessl, Christian and Thiele, Lothar and Tröster, Gerhard}}, booktitle = {{Reconfigurable Technology: FPGAs and Reconfigurable Processors for Computing and Communications III}}, keywords = {{benchmark}}, pages = {{135--146}}, title = {{{Reconfigurable Processors for Handhelds and Wearables: Application Analysis}}}, doi = {{10.1117/12.434376}}, volume = {{4525}}, year = {{2001}}, } @misc{2433, author = {{Plessl, Christian and Maurer, Simon}}, keywords = {{co-design, speech processing}}, publisher = {{Computer Engineering and Networks Lab, ETH Zurich, Switzerland}}, title = {{{Hardware/Software Codesign in Speech Compression Applications}}}, year = {{2000}}, }