@inproceedings{2370, author = {{Woehrle, Matthias and Plessl, Christian and Lim, Roman and Beutel, Jan and Thiele, Lothar}}, booktitle = {{IEEE Int. Conf. on Sensor Networks, Ubiquitous, and Trustworthy Computing (SUTC)}}, isbn = {{978-0-7695-3158-8}}, keywords = {{WSN, testing, verification}}, pages = {{201--208}}, publisher = {{IEEE Computer Society}}, title = {{{EvAnT: Analysis and Checking of event traces for Wireless Sensor Networks}}}, doi = {{10.1109/SUTC.2008.24}}, year = {{2008}}, } @inproceedings{2364, author = {{Schumacher, Tobias and Meiche, Robert and Kaufmann, Paul and Lübbers, Enno and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-60132-064-7}}, pages = {{245--251}}, publisher = {{CSREA Press}}, title = {{{A Hardware Accelerator for k-th Nearest Neighbor Thinning}}}, year = {{2008}}, } @inproceedings{2372, author = {{Schumacher, Tobias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Many-core and Reconfigurable Supercomputing Conference (MRSC)}}, keywords = {{IMORC, IP core, interconnect}}, title = {{{IMORC: An infrastructure for performance monitoring and optimization of reconfigurable computers}}}, year = {{2008}}, } @techreport{2394, author = {{Beutel, Jan and Plessl, Christian and Woehrle, Matthias}}, title = {{{Increasing the Reliability of Wireless Sensor Networks with a Unit Testing Framework}}}, year = {{2007}}, } @inproceedings{2392, author = {{Woehrle, Matthias and Plessl, Christian and Beutel, Jan and Thiele, Lothar}}, booktitle = {{Proc. Workshop on Embedded Networked Sensors (EmNets)}}, isbn = {{978-1-59593-694-3}}, keywords = {{WSN, testing, distributed, embedded}}, pages = {{93--97}}, publisher = {{ACM}}, title = {{{Increasing the Reliability of Wireless Sensor Networks with a Distributed Testing Framework}}}, doi = {{10.1145/1278972.1278996}}, year = {{2007}}, } @inproceedings{2393, author = {{Beutel, Jan and Dyer, Matthias and Lim, Roman and Plessl, Christian and Woehrle, Matthias and Yuecel, Mustafa and Thiele, Lothar}}, booktitle = {{Proc. Int. Conf. Networked Sensing Systems (INSS)}}, isbn = {{1-4244-1231-5}}, keywords = {{WSN, testing, verification}}, pages = {{303--303}}, publisher = {{IEEE}}, title = {{{Automated Wireless Sensor Network Testing}}}, doi = {{10.1109/INSS.2007.4297445}}, year = {{2007}}, } @phdthesis{2404, abstract = {{ In this thesis, we propose to use a reconfigurable processor as main computation element in embedded systems for applications from the multi-media and communications domain. A reconfigurable processor integrates an embedded CPU core with a Reconfigurable Processing Unit (RPU). Many of our target applications require real-time signal-processing of data streams and expose a high computational demand. The key challenge in designing embedded systems for these applications is to find an implementation that satisfies the performance goals and is adaptable to new applications, while the system cost is minimized. Implementations that solely use an embedded CPU are likely to miss the performance goals. Application-Specific Integrated Circuit (ASIC)-based coprocessors can be used for some high-volume products with fixed functions, but fall short for systems with varying applications. We argue that a reconfigurable processor with a coarse-grained, dynamically reconfigurable array of modest size provides an attractive implementation platform for our application domain. The computational intensive application kernels are executed on the RPU, while the remaining parts of the application are executed on the CPU. Reconfigurable hardware allows for implementing application specific coprocessors with a high performance, while the function of the coprocessor can still be adapted due to the programmability. So far, reconfigurable technology is used in embedded systems primarily with static configurations, e.g., for implementing glue-logic, replacing ASICs, and for implementing fixed-function coprocessors. Changing the configuration at runtime enables a number of interesting application modes, e.g., on-demand loading of coprocessors and time-multiplexed execution of coprocessors, which is commonly denoted as hardware virtualization. While the use of static configurations is well understood and supported by design-tools, the role of dynamic reconfiguration is not well investigated yet. Current application specification methods and design-tools do not provide an end-to-end tool-flow that considers dynamic reconfiguration. A key idea of our approach is to reduce system cost by keeping the size of the reconfigurable array small and to use hardware virtualization techniques to compensate for the limited hardware resources. The main contribution of this thesis is the codesign of a reconfigurable processor architecture named ZIPPY, the corresponding hardware and software implementation tools, and an application specification model which explicitly considers hardware virtualization. The ZIPPY architecture is widely parametrized and allows for specifying a whole family of processor architectures. The implementation tools are also parametrized and can target any architectural variant. We evaluate the performance of the architecture with a system-level, cycle-accurate cosimulation framework. This framework enables us to perform design-space exploration for a variety of reconfigurable processor architectures. With two case studies, we demonstrate, that hardware virtualization on the Zippy architecture is feasible and enables us to trade-off performance for area in embedded systems. Finally, we present a novel method for optimal temporal partitioning of sequential circuits, which is an important form of hardware virtualization. The method based on Slowdown and Retiming allows us to decompose any sequential circuit into a number of smaller, communicating subcircuits that can be executed on a dynamically reconfigurable architecture. }}, author = {{Plessl, Christian}}, isbn = {{978-3-8322-5561-3}}, keywords = {{Zippy}}, publisher = {{Shaker Verlag}}, title = {{{Hardware virtualization on a coarse-grained reconfigurable processor}}}, doi = {{10.2370/9783832255619}}, year = {{2006}}, } @inproceedings{2401, abstract = {{ This paper presents a novel method for optimal temporal partitioning of sequential circuits for time-multiplexed reconfigurable architectures. The method bases on slowdown and retiming and maximizes the circuit's performance during execution while restricting the size of the partitions to respect the resource constraints of the reconfigurable architecture. We provide a mixed integer linear program (MILP) formulation of the problem, which can be solved exactly. In contrast to related work, our approach optimizes performance directly, takes structural modifications of the circuit into account, and is extensible. We present the application of the new method to temporal partitioning for a coarse-grained reconfigurable architecture. }}, author = {{Plessl, Christian and Platzner, Marco and Thiele, Lothar}}, booktitle = {{Proc. Int. Conf. on Field Programmable Technology (ICFPT)}}, keywords = {{temporal partitioning, retiming, ILP}}, pages = {{345--348}}, publisher = {{IEEE Computer Society}}, title = {{{Optimal Temporal Partitioning based on Slowdown and Retiming}}}, doi = {{10.1109/FPT.2006.270344}}, year = {{2006}}, } @inproceedings{2411, abstract = {{ This paper motivates the use of hardware virtualization on coarse-grained reconfigurable architectures. We introduce Zippy, a coarse-grained multi-context hybrid CPU with architectural support for efficient hardware virtualization. The architectural details and the corresponding tool flow are outlined. As a case study, we compare the non-virtualized and the virtualized execution of an ADPCM decoder. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Application-Specific Systems, Architectures, and Processors (ASAP)}}, keywords = {{Zippy}}, pages = {{213--218}}, publisher = {{IEEE Computer Society}}, title = {{{Zippy – A coarse-grained reconfigurable array with support for hardware virtualization}}}, doi = {{10.1109/ASAP.2005.69}}, year = {{2005}}, } @article{2412, abstract = {{ Reconfigurable architectures that tightly integrate a standard CPU core with a field-programmable hardware structure have recently been receiving impact of these design decisions on the overall system performance is a challenging task. In this paper, we first present a framework for the cycle-accurate performance evaluation of hybrid reconfigurable processors on the system level. Then, we discuss a reconfigurable processor for data-streaming applications, which attaches a coarse-grained reconfigurable unit to the coprocessor interface of a standard embedded CPU core. By means of a case study we evaluate the system-level impact of certain design features for the reconfigurable unit, such as multiple contexts, register replication, and hardware context scheduling. The results illustrate that a system-level evaluation framework is of paramount importance for studying the architectural trade-offs and optimizing design parameters for reconfigurable processors.}}, author = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}}, journal = {{Microprocessors and Microsystems}}, keywords = {{FPGA, reconfigurable computing, co-simulation, Zippy}}, number = {{2-3}}, pages = {{63--73}}, publisher = {{Elsevier}}, title = {{{System-level performance evaluation of reconfigurable processors}}}, doi = {{10.1016/j.micpro.2004.06.004}}, volume = {{29}}, year = {{2005}}, } @inproceedings{2415, abstract = {{In this paper we introduce to virtualization of hardware on reconfigurable devices. We identify three main approaches denoted with temporal partitioning, virtualized execution, and virtual machine. For each virtualization approach, we discuss the application models, the required execution architectures, the design tools and the run-time systems. Then, we survey a selection of important projects in the field. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, keywords = {{hardware virtualization}}, pages = {{63--69}}, publisher = {{CSREA Press}}, title = {{{Virtualization of Hardware – Introduction and Survey}}}, year = {{2004}}, } @inproceedings{2418, abstract = {{ This paper presents TKDM, a PC-based high-performance reconfigurable computing environment. The TKDM hardware consists of an FPGA module that uses the DIMM (dual inline memory module) bus for high-bandwidth and low-latency communication with the host CPU. The system's firmware is integrated with the Linux host operating system and offers functions for data communication and FPGA reconfiguration. The intended use of TKDM is that of a dynamically reconfigurable co-processor for data streaming applications. The system's firmware can be customized for specific application domains to facilitate simple and easy-to-use programming interfaces. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Technology (ICFPT)}}, keywords = {{coprocessor, DIMM, memory bus, FPGA, high performance computing}}, pages = {{252--259}}, publisher = {{IEEE Computer Society}}, title = {{{TKDM – A Reconfigurable Co-processor in a PC's Memory Slot}}}, doi = {{10.1109/FPT.2003.1275755}}, year = {{2003}}, } @article{2419, abstract = {{Wearable computers are embedded into the mobile environment of their users. A design challenge for wearable systems is to combine the high performance required for tasks such as video decoding with the low energy consumption required to maximise battery runtimes and the flexibility demanded by the dynamics of the environment and the applications. In this paper, we demonstrate that reconfigurable hardware technology is able to answer this challenge. We present the concept and the prototype implementation of an autonomous wearable unit with reconfigurable modules (WURM). We discuss experiments that show the uses of reconfigurable hardware in WURM: ASICs-on-demand and adaptive interfaces. Finally, we present an experiment with an operating system layer for WURM.}}, author = {{Plessl, Christian and Enzler, Rolf and Walder, Herbert and Beutel, Jan and Platzner, Marco and Thiele, Lothar and Tröster, Gerhard}}, journal = {{Personal and Ubiquitous Computing}}, number = {{5}}, pages = {{299--308}}, publisher = {{Springer}}, title = {{{The Case for Reconfigurable Hardware in Wearable Computing}}}, doi = {{10.1007/s00779-003-0243-x}}, volume = {{7}}, year = {{2003}}, } @article{2420, abstract = {{ This paper presents the acceleration of minimum-cost covering problems by instance-specific hardware. First, we formulate the minimum-cost covering problem and discuss a branch \& bound algorithm to solve it. Then we describe instance-specific hardware architectures that implement branch \& bound in 3-valued logic and use reduction techniques similar to those found in software solvers. We further present prototypical accelerator implementations and a corresponding design tool flow. Our experiments reveal significant raw speedups up to five orders of magnitude for a set of smaller unate covering problems. Provided that hardware compilation times can be reduced, we conclude that instance-specific acceleration of hard minimum-cost covering problems will lead to substantial overall speedups. }}, author = {{Plessl, Christian and Platzner, Marco}}, issn = {{0920-8542}}, journal = {{Journal of Supercomputing}}, keywords = {{reconfigurable computing, instance-specific acceleration, minimum covering}}, number = {{2}}, pages = {{109--129}}, publisher = {{Kluwer Academic Publishers}}, title = {{{Instance-Specific Accelerators for Minimum Covering}}}, doi = {{10.1023/a:1024443416592}}, volume = {{26}}, year = {{2003}}, } @inproceedings{2421, abstract = {{In contrast to processors, current reconfigurable devices totally lack programming models that would allow for device independent compilation and forward compatibility. The key to overcome this limitation is hardware virtualization. In this paper, we resort to a macro-pipelined execution model to achieve hardware virtualization for data streaming applications. As a hardware implementation we present a hybrid multi-context architecture that attaches a coarse-grained reconfigurable array to a host CPU. A co-simulation framework enables cycle-accurate simulation of the complete architecture. As a case study we map an FIR filter to our virtualized hardware model and evaluate different designs. We discuss the impact of the number of contexts and the feature of context state on the speedup and the CPU load.}}, author = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}}, keywords = {{Zippy, multi-context, FPGA}}, pages = {{151--160}}, publisher = {{Springer}}, title = {{{Virtualizing Hardware with Multi-Context Reconfigurable Arrays}}}, doi = {{10.1007/b12007}}, volume = {{2778}}, year = {{2003}}, } @inproceedings{2422, abstract = {{Reconfigurable computing architectures aim to dynamically adapt their hardware to the application at hand. As research shows, the time it takes to reconfigure the hardware forms an overhead that can significantly impair the benefits of hardware customization. Multi-context devices are one promising approach to overcome the limitations posed by long reconfiguration times. In contrast to more traditional reconfigurable architectures, multi-context devices hold several configurations on-chip. On demand, the device can quickly switch to another context. In this paper we present a co-simulation environment to investigate design trade-offs for hybrid multi-context architectures. Our architectural model comprises a reconfigurable unit closely coupled to a CPU core. As a case study, we discuss the implementation of a FIR filter partitioned into several contexts. We outline the mapping process and present simulation results for single- and multi-context reconfigurable units coupled with both embedded and high-end CPUs.}}, author = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, isbn = {{1-932415-05-X}}, keywords = {{Zippy, co-simulation}}, pages = {{174--180}}, publisher = {{CSREA Press}}, title = {{{Co-simulation of a Hybrid Multi-Context Architecture}}}, year = {{2003}}, } @inproceedings{2423, abstract = {{Wearable computers are embedded into the mobile environment of the human body. A design challenge for wearable systems is to combine the high performance required for tasks such as video decoding with low energy consumption required to maximize battery runtimes and the flexibility demanded by the dynamics of the environment and the applications. In this paper, we demonstrate that reconfigurable hardware technology is able to answer this challenge. We present the concept and the prototype implementation of an autonomous wearable unit with reconfigurable modules (WURM). We discuss two experiments that show the uses of reconfigurable hardware in WURM: ASICs-on-demand and adaptive interfaces. Finally, we develop and evaluate task placement techniques used in the operating system layer of WURM.}}, author = {{Plessl, Christian and Enzler, Rolf and Walder, Herbert and Beutel, Jan and Platzner, Marco and Thiele, Lothar}}, booktitle = {{Proc. Int. Symp. on Wearable Computers (ISWC)}}, isbn = {{0-7695-1816-8}}, keywords = {{wearable computing}}, pages = {{215--222}}, publisher = {{IEEE Computer Society}}, title = {{{Reconfigurable Hardware in Wearable Computing Nodes}}}, doi = {{10.1109/ISWC.2002.1167250}}, year = {{2002}}, } @inproceedings{2424, abstract = {{ Recent generations of high-density and high-speed FPGAs provide a sufficient capacity for implementing complete configurable systems on a chip (CSoCs). Hybrid CPUs that combine standard CPU cores with reconfigurable coprocessors are an important subclass of CSoCs. With partially reconfigurable FPGAs, coprocessors can be loaded on demand while the CPU remains running. However, the lack of high-level design tools for partial reconfiguration makes practical implementations a challenging task. In this paper, we introduce a design flow to implement hybrid processors on Xilinx Virtex. The design flow is based on two techniques, virtual sockets and feed-through components, and can efficiently generate partial configurations from industry-quality cores. We discuss the design flow and present a fully operational audio streaming prototype to demonstrate its feasibility. }}, author = {{Dyer, Matthias and Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}}, keywords = {{partial reconfiguration}}, pages = {{292--301}}, publisher = {{Springer}}, title = {{{Partially Reconfigurable Cores for Xilinx Virtex}}}, doi = {{10.1007/3-540-46117-5}}, volume = {{2438}}, year = {{2002}}, } @inproceedings{2425, abstract = {{ We present instance-specific custom computing machines for the set covering problem. Four accelerator architectures are developed that implement branch \& bound in 3-valued logic and many of the deduction techniques found in software solvers. We use set covering benchmarks from two-level logic minimization and Steiner triple systems to derive and discuss experimental results. The resulting raw speedups are in the order of four magnitudes on average. Finally, we propose a hybrid solver architecture that combines the raw speed of instance-specific reconfigurable hardware with flexible bounding schemes implemented in software. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Symp. on Field-Programmable Custom Computing Machines (FCCM)}}, pages = {{163--172}}, publisher = {{IEEE Computer Society}}, title = {{{Custom Computing Machines for the Set Covering Problem}}}, doi = {{10.1109/FPGA.2002.1106671}}, year = {{2002}}, } @inproceedings{2428, abstract = {{ In this paper we present instance-specific accelerators for minimum-cost covering problems. We first define the covering problem and discuss a branch&bound algorithm to solve it. Then we describe an instance-specific hardware architecture that implements branch&bound in 3-valued logic and uses reduction techniques usually found in software solvers. Results for small unate covering problems reveal significant raw speedups. }}, author = {{Plessl, Christian and Platzner, Marco}}, booktitle = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}}, keywords = {{minimum covering, accelerator, funding-sundance}}, pages = {{85--91}}, publisher = {{CSREA Press}}, title = {{{Instance-Specific Accelerators for Minimum Covering}}}, year = {{2001}}, }