@misc{22216,
  author       = {{Rehnen, Jakob Werner}},
  title        = {{{Decomposition of Arithmetic Components for the Approximate Circuit Synthesis with EvoApproxLib}}},
  year         = {{2021}},
}

@misc{22483,
  abstract     = {{This bachelor thesis presents a C/C++ implementation of the XCS algorithm for an embedded system and profiling results concerning the execution time of the functions. These are then analyzed in relation to the input characteristics of the examined learning environments and compared with related work. Three main conclusions can be drawn from the measured results. First, the maximum size of the population of the classifiers influences the runtime of the genetic algorithm; second, the size of the input space has a direct effect on the execution time of the matching function; and last, a larger action space results in a longer runtime generating the prediction for the possible actions. The dependencies identified here can serve to optimize the computational efficiency and make XCS more suitable for embedded systems.}},
  author       = {{Brede, Mathis}},
  publisher    = {{Paderborn University}},
  title        = {{{Implementation and Profiling of XCS in the Context of Embedded Systems}}},
  year         = {{2021}},
}

@misc{29540,
  abstract     = {{Autonomous mobile robots are becoming increasingly more capable and widespread. Reliable Obstacle avoidance is an integral part of autonomous navigation. This involves real time interpretation and processing of a complex environment. Strict time and energy constraints of a mobile autonomous system make efficient computation extremely desirable. The benefits of employing Hardware/Software co-designed applications are obvious and significant. Hardware accelerators are used for efficient processing of the algorithms by exploiting parallelism. FPGAs are a class of hardware accelerators, which
can contain hundreds of small execution units, and can be used for Hardware/Software co-designed application. However, there is a reluctance when it comes to adoption of these devices in well established application domains, such as Robotics, due to a steep learning curve needed for FPGA application design. ReconROS has successfully bridged the gap between robotic and FPGA application development, by providing an intuitive, common development platform for robotic application development for FPGA. It does so by integrating Robotics Operating System(ROS) which is an industry and academia standard for robotics application development, with ReconOS, an operating system for re-configurable hardware. In this thesis an obstacle avoidance system is designed and implemented for an autonomous vehicle using ReconROS. The objectives of the thesis is to demonstrate and explore ReconROS integration within the ROS ecosystem and explore the design process within ReconROS framework, and to demonstrate the effectiveness of Hardware Acceleration in Robotics, by analysing the resulting architectures for Latency and Power Consumption.}},
  author       = {{Sheikh, Muhammad Aamir}},
  publisher    = {{Paderborn University}},
  title        = {{{Design and Implementation of a ReconROS-based Obstacle Avoidance System}}},
  year         = {{2021}},
}

@misc{21324,
  author       = {{Chandrakar, Khushboo}},
  title        = {{{Comparison of Feature Selection Techniques to Improve Approximate Circuit Synthesis}}},
  year         = {{2020}},
}

@misc{21432,
  abstract     = {{Robots are becoming increasingly autonomous and more capable. Because of a limited portable energy budget by e.g. batteries, and more demanding algorithms, an efficient computation is of interest. Field Programmable Gate Arrays (FPGAs) for example can provide fast and efficient processing and the Robot Operating System (ROS) is a popular
middleware used for robotic applications. The novel ReconROS combines version 2 of the Robot Operating System with ReconOS, a framework for integrating reconfigurable hardware. It provides a unified interface between software and hardware. ReconROS is evaluated in this thesis by implementing a Sobel filter as the video processing application, running on a Zynq-7000 series System on Chip. Timing measurements were taken of execution and transfer times and were compared to theoretical values. Designing the hardware implementation is done by C code using High Level Synthesis and with the interface and functionality provided by ReconROS. An important aspect is the publish/subscribe mechanism of ROS. The Operating System interface functions for publishing and subscribing are reasonably fast at below 10 ms for a 1 MB color VGA image. The main memory interface performs well at higher data sizes, crossing 100 MB/s at 20 kB and increasing to a maximum of around 150 MB/s. Furthermore, the hardware implementation introduces consistency to the execution times and performs twice as fast as the software implementation.}},
  author       = {{Henke, Luca-Sebastian}},
  title        = {{{Evaluation of a ReconOS-ROS Combination based on a Video Processing Application}}},
  year         = {{2020}},
}

@misc{20820,
  author       = {{Thiele, Simon}},
  title        = {{{Implementing Machine Learning Functions as PYNQ FPGA Overlays}}},
  year         = {{2020}},
}

@misc{20821,
  author       = {{Jaganath, Vivek}},
  title        = {{{Extension and Evaluation of Python-based High-Level Synthesis Tool Flows}}},
  year         = {{2020}},
}

@misc{21433,
  abstract     = {{Modern machine learning (ML) techniques continue to move into the embedded system space because traditional centralized compute resources do not suit certain application domains, for example in mobile or real-time environments. Google’s TensorFlow Lite (TFLite) framework supports this shift from cloud to edge computing and makes ML inference accessible on resource-constrained devices. While it offers the possibility to partially delegate computation to hardware accelerators, there is no such “delegate” available to utilize the promising characteristics of reconfigurable hardware.
This thesis incorporates modern platform FPGAs into TFLite by implementing a modular delegate framework, which allows accelerators within the programmable logic to take over the execution of neural network layers. To facilitate the necessary hardware/software codesign, the FPGA delegate is based on the operating system for reconfigurable
computing (ReconOS), whose partial reconfiguration support enables the instantiation of model-tailored accelerator architectures. In the hardware back-end, a streaming-based prototype accelerator for the MobileNet model family showcases the working order of the platform, but falls short of the desired performance. Thus, it indicates the need for further exploration of alternative accelerator designs, which the delegate could automatically synthesize to meet a model’s demands.}},
  author       = {{Jentzsch, Felix P.}},
  title        = {{{Design and Implementation of a ReconOS-based TensorFlow Lite Delegate Architecture}}},
  year         = {{2020}},
}

@misc{15920,
  abstract     = {{Secure hardware design is the most important aspect to be considered in addition to functional correctness. Achieving hardware security in today’s globalized Integrated Cir- cuit(IC) supply chain is a challenging task. One solution that is widely considered to help achieve secure hardware designs is Information Flow Tracking(IFT). It provides an ap- proach to verify that the systems adhere to security properties either by static verification during design phase or dynamic checking during runtime.
Proof-Carrying Hardware(PCH) is an approach to verify a functional design prior to using it in hardware. It is a two-party verification approach, where the target party, the consumer requests new functionalities with pre-defined properties to the producer. In response, the producer designs the IP (Intellectual Property) cores with the requested functionalities that adhere to the consumer-defined properties. The producer provides the IP cores and a proof certificate combined into a proof-carrying bitstream to the consumer to verify it. If the verification is successful, the consumer can use the IP cores in his hardware. In essence, the consumer can only run verified IP cores. Correctly applied, PCH techniques can help consumers to defend against many unintentional modifications and malicious alterations of the modules they receive. There are numerous published examples of how to use PCH to detect any change in the functionality of a circuit, i.e., pairing a PCH approach with functional equivalence checking for combinational or sequential circuits. For non-functional properties, since opening new covert channels to leak secret information from secure circuits is a viable attack vector for hardware trojans, i.e., intentionally added malicious circuitry, IFT technique is employed to make sure that secret/untrusted information never reaches any unclassified/trusted outputs.
This master thesis aims to explore the possibility of adapting Information Flow Tracking into a Proof-Carrying Hardware scenario. It aims to create a method that combines Infor- mation Flow Tracking(IFT) with a PCH approach at bitstream level enabling consumers to validate the trustworthiness of a module’s information flow without the computational costs of a complete flow analysis.}},
  author       = {{Keerthipati, Monica}},
  publisher    = {{Universität Paderborn}},
  title        = {{{A Bitstream-Level Proof-Carrying Hardware Technique for Information Flow Tracking}}},
  year         = {{2019}},
}

@misc{14831,
  author       = {{Sabu, Nithin S.}},
  publisher    = {{Paderborn University}},
  title        = {{{FPGA Acceleration of String Search Techniques in Huge Data Sets}}},
  year         = {{2019}},
}

@misc{14546,
  author       = {{Hansmeier, Tim}},
  publisher    = {{Universität Paderborn}},
  title        = {{{Autonomous Operation of High-Performance Compute Nodes through Self-Awareness and Learning Classifiers}}},
  year         = {{2019}},
}

@misc{15874,
  author       = {{Lienen, Christian}},
  publisher    = {{Universität Paderborn}},
  title        = {{{Implementing a Real-time System on a Platform FPGA operated with ReconOS}}},
  year         = {{2019}},
}

@misc{3365,
  author       = {{Schnuer, Jan-Philip}},
  publisher    = {{Universität Paderborn}},
  title        = {{{Static Scheduling Algorithms for Heterogeneous Compute Nodes}}},
  year         = {{2018}},
}

@misc{3366,
  author       = {{Croce, Marcel}},
  publisher    = {{Universität Paderborn}},
  title        = {{{Evaluation of OpenCL-based Compilation for FPGAs}}},
  year         = {{2018}},
}

@phdthesis{3720,
  abstract     = {{Traditional cache design uses a consolidated block of memory address bits to index a cache set, equivalent to the use of modulo functions. While this module-based mapping scheme is widely used in contemporary cache structures due to the simplicity of its hardware design and its good performance for sequences of consecutive addresses, its use may not be satisfactory for a variety of application domains having different characteristics.This thesis presents a new type of cache mapping scheme, motivated by programmable capabilities combined with Nature-inspired optimization of reconfigurable hardware. This research has focussed on an FPGA-based evolvable cache structure of the first level cache in a multi-core processor architecture, able to dynamically change cache indexing. To solve the challenge of reconfigurable cache mappings, a programmable Boolean circuit based on a combination of Look-up Table (LUT) memory elements is proposed. Focusing on optimization aspects at the system level, a Performance Measurement Infrastructure is introduced that is able to monitor the underlying microarchitectural metrics, and an adaptive evaluation strategy is presented that leverages on Evolutionary Algorithms, that is not only capable of evolving application-specific address-to-cache-index mappings for level one split caches but also of reducing optimization times. Putting this all together and prototyping in an FPGA for a LEON3/Linux-based multi-core processor, the creation of a system architecture reduces cache misses and improves performance over the use of conventional caches.}},
  author       = {{Ho, Nam}},
  pages        = {{139}},
  publisher    = {{Universität Paderborn}},
  title        = {{{FPGA-based Reconfigurable Cache Mapping Schemes: Design and Optimization}}},
  doi          = {{10.17619/UNIPB/1-376}},
  year         = {{2018}},
}

@misc{3580,
  author       = {{Hansmeier, Tim}},
  publisher    = {{Universität Paderborn}},
  title        = {{{An FPGA Accelerator for Checking Resolution Proofs}}},
  year         = {{2017}},
}

@misc{1157,
  author       = {{Witschen, Linus Matthias}},
  publisher    = {{Universität Paderborn}},
  title        = {{{A Framework for the Synthesis of Approximate Circuits}}},
  year         = {{2017}},
}

@misc{74,
  author       = {{Knorr, Christoph}},
  publisher    = {{Universität Paderborn}},
  title        = {{{OpenCL-basierte Videoverarbeitung auf heterogenen Rechenknoten}}},
  year         = {{2017}},
}

@misc{3364,
  author       = {{Knorr, Christoph}},
  publisher    = {{Universität Paderborn}},
  title        = {{{Evaluation von Bildverarbeitungsalgorithmen in heterogenen Rechenknoten}}},
  year         = {{2015}},
}

@misc{10701,
  author       = {{Koch, Benjamin}},
  publisher    = {{Paderborn University}},
  title        = {{{Hardware Acceleration of Mechatronic Controllers on a Zynq Platform FPGA}}},
  year         = {{2014}},
}

