@phdthesis{2404,
  abstract     = {{ In this thesis, we propose to use a reconfigurable processor as main computation element in embedded systems for applications from the multi-media and communications domain. A reconfigurable processor integrates an embedded CPU core with a Reconfigurable Processing Unit (RPU). Many of our target applications require real-time signal-processing of data streams and expose a high computational demand. The key challenge in designing embedded systems for these applications is to find an implementation that satisfies the performance goals and is adaptable to new applications, while the system cost is minimized. Implementations that solely use an embedded CPU are likely to miss the performance goals. Application-Specific Integrated Circuit (ASIC)-based coprocessors can be used for some high-volume products with fixed functions, but fall short for systems with varying applications. We argue that a reconfigurable processor with a coarse-grained, dynamically reconfigurable array of modest size provides an attractive implementation platform for our application domain. The computational intensive application kernels are executed on the RPU, while the remaining parts of the application are executed on the CPU. Reconfigurable hardware allows for implementing application specific coprocessors with a high performance, while the function of the coprocessor can still be adapted due to the programmability. So far, reconfigurable technology is used in embedded systems primarily with static configurations, e.g., for implementing glue-logic, replacing ASICs, and for implementing fixed-function coprocessors. Changing the configuration at runtime enables a number of interesting application modes, e.g., on-demand loading of coprocessors and time-multiplexed execution of coprocessors, which is commonly denoted as hardware virtualization. While the use of static configurations is well understood and supported by design-tools, the role of dynamic reconfiguration is not well investigated yet. Current application specification methods and design-tools do not provide an end-to-end tool-flow that considers dynamic reconfiguration. A key idea of our approach is to reduce system cost by keeping the size of the reconfigurable array small and to use hardware virtualization techniques to compensate for the limited hardware resources. The main contribution of this thesis is the codesign of a reconfigurable processor architecture named ZIPPY, the corresponding hardware and software implementation tools, and an application specification model which explicitly considers hardware virtualization. The ZIPPY architecture is widely parametrized and allows for specifying a whole family of processor architectures. The implementation tools are also parametrized and can target any architectural variant. We evaluate the performance of the architecture with a system-level, cycle-accurate cosimulation framework. This framework enables us to perform design-space exploration for a variety of reconfigurable processor architectures. With two case studies, we demonstrate, that hardware virtualization on the Zippy architecture is feasible and enables us to trade-off performance for area in embedded systems. Finally, we present a novel method for optimal temporal partitioning of sequential circuits, which is an important form of hardware virtualization. The method based on Slowdown and Retiming allows us to decompose any sequential circuit into a number of smaller, communicating subcircuits that can be executed on a dynamically reconfigurable architecture. }},
  author       = {{Plessl, Christian}},
  isbn         = {{978-3-8322-5561-3}},
  keywords     = {{Zippy}},
  publisher    = {{Shaker Verlag}},
  title        = {{{Hardware virtualization on a coarse-grained reconfigurable processor}}},
  doi          = {{10.2370/9783832255619}},
  year         = {{2006}},
}

@inproceedings{2411,
  abstract     = {{ This paper motivates the use of hardware virtualization on coarse-grained reconfigurable architectures. We introduce Zippy, a coarse-grained multi-context hybrid CPU with architectural support for efficient hardware virtualization. The architectural details and the corresponding tool flow are outlined. As a case study, we compare the non-virtualized and the virtualized execution of an ADPCM decoder. }},
  author       = {{Plessl, Christian and Platzner, Marco}},
  booktitle    = {{Proc. Int. Conf. on Application-Specific Systems, Architectures, and Processors (ASAP)}},
  keywords     = {{Zippy}},
  pages        = {{213--218}},
  publisher    = {{IEEE Computer Society}},
  title        = {{{Zippy – A coarse-grained reconfigurable array with support for hardware virtualization}}},
  doi          = {{10.1109/ASAP.2005.69}},
  year         = {{2005}},
}

@article{2412,
  abstract     = {{ Reconfigurable architectures that tightly integrate a standard CPU core with a field-programmable hardware structure have recently been receiving impact of these design decisions on the overall system performance is a challenging task. In this paper, we first present a framework for the cycle-accurate performance evaluation of hybrid reconfigurable processors on the system level. Then, we discuss a reconfigurable processor for data-streaming applications, which attaches a coarse-grained reconfigurable unit to the coprocessor interface of a standard embedded CPU core. By means of a case study we evaluate the system-level impact of certain design features for the reconfigurable unit, such as multiple contexts, register replication, and hardware context scheduling. The results illustrate that a system-level evaluation framework is of paramount importance for studying the architectural trade-offs and optimizing design parameters for reconfigurable processors.}},
  author       = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}},
  journal      = {{Microprocessors and Microsystems}},
  keywords     = {{FPGA, reconfigurable computing, co-simulation, Zippy}},
  number       = {{2-3}},
  pages        = {{63--73}},
  publisher    = {{Elsevier}},
  title        = {{{System-level performance evaluation of reconfigurable processors}}},
  doi          = {{10.1016/j.micpro.2004.06.004}},
  volume       = {{29}},
  year         = {{2005}},
}

@inproceedings{2421,
  abstract     = {{In contrast to processors, current reconfigurable devices totally lack programming models that would allow for device independent compilation and forward compatibility. The key to overcome this limitation is hardware virtualization. In this paper, we resort to a macro-pipelined execution model to achieve hardware virtualization for data streaming applications. As a hardware implementation we present a hybrid multi-context architecture that attaches a coarse-grained reconfigurable array to a host CPU. A co-simulation framework enables cycle-accurate simulation of the complete architecture. As a case study we map an FIR filter to our virtualized hardware model and evaluate different designs. We discuss the impact of the number of contexts and the feature of context state on the speedup and the CPU load.}},
  author       = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}},
  booktitle    = {{Proc. Int. Conf. on Field Programmable Logic and Applications (FPL)}},
  keywords     = {{Zippy, multi-context, FPGA}},
  pages        = {{151--160}},
  publisher    = {{Springer}},
  title        = {{{Virtualizing Hardware with Multi-Context Reconfigurable Arrays}}},
  doi          = {{10.1007/b12007}},
  volume       = {{2778}},
  year         = {{2003}},
}

@inproceedings{2422,
  abstract     = {{Reconfigurable computing architectures aim to dynamically adapt their hardware to the application at hand. As research shows, the time it takes to reconfigure the hardware forms an overhead that can significantly impair the benefits of hardware customization. Multi-context devices are one promising approach to overcome the limitations posed by long reconfiguration times. In contrast to more traditional reconfigurable architectures, multi-context devices hold several configurations on-chip. On demand, the device can quickly switch to another context. In this paper we present a co-simulation environment to investigate design trade-offs for hybrid multi-context architectures. Our architectural model comprises a reconfigurable unit closely coupled to a CPU core. As a case study, we discuss the implementation of a FIR filter partitioned into several contexts. We outline the mapping process and present simulation results for single- and multi-context reconfigurable units coupled with both embedded and high-end CPUs.}},
  author       = {{Enzler, Rolf and Plessl, Christian and Platzner, Marco}},
  booktitle    = {{Proc. Int. Conf. on Engineering of Reconfigurable Systems and Algorithms (ERSA)}},
  isbn         = {{1-932415-05-X}},
  keywords     = {{Zippy, co-simulation}},
  pages        = {{174--180}},
  publisher    = {{CSREA Press}},
  title        = {{{Co-simulation of a Hybrid Multi-Context Architecture}}},
  year         = {{2003}},
}

