@inproceedings{6856,
  author       = {{Müller, Michelle and Gutt, Dominik}},
  booktitle    = {{Wirtschaftsinformatik Proceedings 2019}},
  location     = {{Siegen, Germany}},
  title        = {{{Heart over Heels? An Empirical Analysis of the Relationship between Emotions and Review Helpfulness for Experience and Credence Goods}}},
  year         = {{2019}},
}

@inproceedings{6857,
  author       = {{Poniatowski, Martin and Neumann, Jürgen and Görzen, Thomas and Kundisch, Dennis}},
  booktitle    = {{Wirtschaftsinformatik Proceedings 2019}},
  location     = {{Siegen, Germany}},
  title        = {{{A Semi-Automated Approach for Generating Online Review Templates, }}},
  year         = {{2019}},
}

@inproceedings{6860,
  author       = {{Afifi, Haitham and Karl, Holger}},
  booktitle    = {{2019 16th IEEE Annual Consumer Communications & Networking Conference (CCNC2019)}},
  publisher    = {{IEEE}},
  title        = {{{Power Allocation with a Wireless Multi-cast Aware Routing for Virtual Network Embedding}}},
  year         = {{2019}},
}

@inproceedings{6976,
  abstract     = {{We investigate the maintenance of overlay networks under massive churn, i.e.
nodes joining and leaving the network. We assume an adversary that may churn a
constant fraction $\alpha n$ of nodes over the course of $\mathcal{O}(\log n)$
rounds. In particular, the adversary has an almost up-to-date information of
the network topology as it can observe an only slightly outdated topology that
is at least $2$ rounds old. Other than that, we only have the provably minimal
restriction that new nodes can only join the network via nodes that have taken
part in the network for at least one round.
  Our contributions are as follows: First, we show that it is impossible to
maintain a connected topology if adversary has up-to-date information about the
nodes' connections. Further, we show that our restriction concerning the join
is also necessary. As our main result present an algorithm that constructs a
new overlay- completely independent of all previous overlays - every $2$
rounds. Furthermore, each node sends and receives only $\mathcal{O}(\log^3 n)$
messages each round. As part of our solution we propose the Linearized DeBruijn
Swarm (LDS), a highly churn resistant overlay, which will be maintained by the
algorithm. However, our approaches can be transferred to a variety of classical
P2P Topologies where nodes are mapped into the $[0,1)$-interval.}},
  author       = {{Götte, Thorsten and Vijayalakshmi, Vipin Ravindran and Scheideler, Christian}},
  booktitle    = {{Proceedings of the 2019 IEEE 33rd International Parallel  and Distributed Processing Symposium (IPDPS '19)}},
  location     = {{Rio de Janeiro, Brazil}},
  publisher    = {{IEEE}},
  title        = {{{Always be Two Steps Ahead of Your Enemy - Maintaining a Routable Overlay under Massive Churn with an Almost Up-to-date Adversary}}},
  year         = {{2019}},
}

@inproceedings{16411,
  abstract     = {{FPGA devices have been proving to be good candidates to accelerate applications from different research topics. For instance, machine learning applications such as K-Means clustering usually relies on large amount of data to be processed, and, despite the performance offered by other architectures, FPGAs can offer better energy efficiency. With that in mind, Intel has launched a platform that integrates a multicore and an FPGA in the same package, enabling low latency and coherent fine-grained data offload. In this paper, we present a parallel implementation of the K-Means clustering algorithm, for this novel platform, using OpenCL language, and compared it against other platforms. We found that the CPU+FPGA platform was more energy efficient than the CPU-only approach from 70.71% to 85.92%, with Standard and Tiny input sizes respectively, and up to 68.21% of performance improvement was obtained with Tiny input size. Furthermore, it was up to 7.2×more energy efficient than an Intel® Xeon Phi ™, 21.5×than a cluster of Raspberry Pi boards, and 3.8×than the low-power MPPA-256 architecture, when the Standard input size was used.}},
  author       = {{Souza, Matheus A. and Maciel, Lucas A. and Penna, Pedro Henrique and Freitas, Henrique C.}},
  booktitle    = {{2018 30th International Symposium on Computer Architecture and High Performance Computing (SBAC-PAD)}},
  isbn         = {{9781538677698}},
  keywords     = {{pc2-harp-ressources}},
  title        = {{{Energy Efficient Parallel K-Means Clustering for an Intel® Hybrid Multi-Chip Package}}},
  doi          = {{10.1109/cahpc.2018.8645850}},
  year         = {{2019}},
}

@inproceedings{16413,
  abstract     = {{In recent years, FPGAs have been successfully employed for the implementation of efficient, application-specific accelerators for a wide range of machine learning tasks. In this work, we consider probabilistic models, namely, (Mixed) Sum-Product Networks (SPN), a deep architecture that can provide tractable inference for multivariate distributions over mixed data-sources. We develop a fully pipelined FPGA accelerator architecture, including a pipelined interface to external memory, for the inference in (mixed) SPNs. To meet the precision constraints of SPNs, all computations are conducted using double-precision floating point arithmetic. Starting from an input description, the custom FPGA-accelerator is synthesized fully automatically by our tool flow. To the best of our knowledge, this work is the first approach to offload the SPN inference problem to FPGA-based accelerators. Our evaluation shows that the SPN inference problem benefits from offloading to our pipelined FPGA accelerator architecture.}},
  author       = {{Sommer, Lukas and Oppermann, Julian and Molina, Alejandro and Binnig, Carsten and Kersting, Kristian and Koch, Andreas}},
  booktitle    = {{2018 IEEE 36th International Conference on Computer Design (ICCD)}},
  isbn         = {{9781538684771}},
  keywords     = {{pc2-harp-ressources}},
  title        = {{{Automatic Mapping of the Sum-Product Network Inference Problem to FPGA-Based Accelerators}}},
  doi          = {{10.1109/iccd.2018.00060}},
  year         = {{2019}},
}

@misc{16415,
  author       = {{Lienen, Julian}},
  title        = {{{Automated Feature Engineering on Time Series Data}}},
  year         = {{2019}},
}

@inproceedings{16417,
  abstract     = {{The performance of High-Level Synthesis (HLS) applications with irregular data structures is limited by its imperative programming paradigm like C/C++. In this paper, we show that constructing concurrent data structures with channels, a programming construct derived from CSP (communicating sequential processes) paradigm, is an effective approach to improve the performance of these applications. We evaluate concurrent data structure for FPGA by synthesizing a K-means clustering algorithm on the Intel HARP2 platform. A fully pipelined KMC processing element can be synthesized from OpenCL with the help of a SPSC (single-producer-single-consumer) queue and stack built from channels, achieving 15.2x speedup over a sequential baseline. The number of processing element can be scaled up by leveraging a MPMC (multiple-producer-multiple-consumer) stack with work distribution for dynamic load balance. Evaluation shows that an additional 3.5x speedup can be achieved when 4 processing element is instantiated. These results show that the concurrent data structure built with channels has great potential for improving the parallelism of HLS applications. We hope that our study will stimulate further research into the potential of channel-based HLS.}},
  author       = {{Yan, Hui and Li, Zhaoshi and Liu, Leibo and Yin, Shouyi and Wei, Shaojun}},
  booktitle    = {{Proceedings of the 2019 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays}},
  isbn         = {{9781450361378}},
  keywords     = {{pc2-harp-ressources}},
  title        = {{{Constructing Concurrent Data Structures on FPGA with Channels}}},
  doi          = {{10.1145/3289602.3293921}},
  year         = {{2019}},
}

@article{16420,
  abstract     = {{<jats:p> Field-Programmable Gate Arrays (FPGAs) are widely used in the central signal processing design of the Square Kilometer Array (SKA) as hardware accelerators. The frequency domain acceleration search (FDAS) module is an important part of the SKA1-MID pulsar search engine. To develop for a yet to be finalized hardware, for cross-discipline interoperability and to achieve fast prototyping, OpenCL as a high-level FPGA synthesis approaches employed to create the sub-modules of FDAS. The FT convolution and the harmonic-summing plus some other minor sub-modules are elements in the FDAS module that have been well-optimized separately before. In this paper, we explore the design space of combining well-optimized designs, dealing with the ensuing need to trade-off and compromise. Pipeline computing is employed to handle multiple input arrays at high speed. The hardware target is to employ multiple high-end FPGAs to process the combined FDAS module. The results show interesting consequences, where the best individual solutions are not necessarily the best solutions for the speed of a pipeline where FPGA resources and memory bandwidth need to be shared. By proposing multiple buffering techniques to the pipeline, the combined FDAS module can achieve up to 2[Formula: see text] speedup over implementations without pipeline computing. We perform an extensive experimental evaluation on multiple high-end FPGA cards hosted in a workstation and compare to a technology comparable mid-range GPU. </jats:p>}},
  author       = {{Wang, Haomiao and Thiagaraj, Prabu and Sinnen, Oliver}},
  issn         = {{2251-1717}},
  journal      = {{Journal of Astronomical Instrumentation}},
  keywords     = {{pc2-harp-ressources}},
  title        = {{{Combining Multiple Optimized FPGA-based Pulsar Search Modules Using OpenCL}}},
  doi          = {{10.1142/s2251171719500089}},
  year         = {{2019}},
}

@article{16422,
  abstract     = {{<jats:p>Intel recently introduced the Heterogeneous Architecture Research Platform, HARP. In this platform, the Central Processing Unit and a Field-Programmable Gate Array are connected through a high-bandwidth, low-latency interconnect and both share DRAM memory. For this platform, Open Computing Language (OpenCL), a High-Level Synthesis (HLS) language, is made available. By making use of HLS, a faster design cycle can be achieved compared to programming in a traditional hardware description language. This, however, comes at the cost of having less control over the hardware implementation. We will investigate how OpenCL can be applied to implement a real-time guided image filter on the HARP platform. In the first phase, the performance-critical parameters of the OpenCL programming model are defined using several specialized benchmarks. In a second phase, the guided image filter algorithm is implemented using the insights gained in the first phase. Both a floating-point and a fixed-point implementation were developed for this algorithm, based on a sliding window implementation. This resulted in a maximum floating-point performance of 135 GFLOPS, a maximum fixed-point performance of 430 GOPS and a throughput of HD color images at 74 frames per second.</jats:p>}},
  author       = {{Faict, Thomas and D’Hollander, Erik H. and Goossens, Bart}},
  issn         = {{1999-4893}},
  journal      = {{Algorithms}},
  keywords     = {{pc2-harp-ressources}},
  title        = {{{Mapping a Guided Image Filter on the HARP Reconfigurable Architecture Using OpenCL}}},
  doi          = {{10.3390/a12080149}},
  year         = {{2019}},
}

@article{16423,
  abstract     = {{Heterogeneous computing that exploits simultaneous co-processing with different device types has been shown to be effective at both increasing performance and reducing energy consumption. In this paper, we extend a scheduling framework encapsulated in a high-level C++ template and previously developed for heterogeneous chips comprising CPU and GPU cores, to new high-performance platforms for the data center, which include a cache coherent FPGA fabric and many-core CPU resources. Our goal is to evaluate the suitability of our framework with these new FPGA-based platforms, identifying performance benefits and limitations.We target the state-of-the-art HARP processor that includes 14 high-end Xeon classes tightly coupled to a FPGA device located in the same package. We select eight benchmarks from the high-performance computing domain that have been ported and optimized for this heterogeneous platform. The results show that a dynamic and adaptive scheduler that exploits simultaneous processing among the devices can improve performance up to a factor of 8 × compared to the best alternative solutions that only use the CPU cores or the FPGA fabric. Moreover, our proposal achieves up to 15% and 37% of improvement compared to the best heterogeneous solutions found with a dynamic and static schedulers, respectively.}},
  author       = {{Rodríguez, Andrés and Navarro, Angeles and Asenjo, Rafael and Corbera, Francisco and Gran, Rubén and Suárez, Darío and Nunez-Yanez, Jose}},
  issn         = {{0920-8542}},
  journal      = {{The Journal of Supercomputing}},
  keywords     = {{pc2-harp-ressources}},
  title        = {{{Parallel multiprocessing and scheduling on the heterogeneous Xeon+FPGA platform}}},
  doi          = {{10.1007/s11227-019-02935-1}},
  year         = {{2019}},
}

@inproceedings{16427,
  abstract     = {{Transactional Memory (TM) has been considered as a promising alternative to existing synchronization operations, which are often the largest stumbling block to unleashing parallelism of applications. Efficient implementations of TM, however, are challenging due to the tension between lowering performance overhead and avoiding unnecessary aborts.

In this paper, we present Reachability-based Optimistic Concurrency Control for Transactional Memory (ROCoCoTM), a novel scheme which offloads concurrency control (CC) algorithms, the central building blocks of TM systems, to reconfigurable hardware. To reduce the abort rate, an innovative formalization of mainstream CC algorithms is developed to reveal a common restriction that leads to unnecessary aborts. This restriction is resolved by the ROCoCo algorithm with a centralized validation phase, which can be efficiently pipelined in hardware. Thanks to a high-performance offloading engine implemented in reconfigurable hardware, ROCoCo algorithm results in decreased abort rates and reduced performance overhead. The whole system is implemented on Intel's HARP2 platform and evaluated with the STAMP benchmark suite. Experiments show 1.55x and 8.05x geomean speedup over TinySTM and an HTM based on Intel TSX, respectively. Given the fast-growing deployment of commodity CPU-FPGA platforms, ROCoCoTM paves the way for software programmers to exploit heterogeneous computing resources with a high-level transactional abstraction to effectively extract the parallelism in modern applications.}},
  author       = {{Li, Zhaoshi and Liu, Leibo and Deng, Yangdong and Wang, Jiawei and Liu, Zhiwei and Yin, Shouyi and Wei, Shaojun}},
  booktitle    = {{Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture}},
  isbn         = {{9781450369381}},
  keywords     = {{pc2-harp-ressources}},
  title        = {{{FPGA-Accelerated Optimistic Concurrency Control for Transactional Memory}}},
  doi          = {{10.1145/3352460.3358270}},
  year         = {{2019}},
}

@inproceedings{16433,
  author       = {{Rehlaender, Philipp and Grote, Tobias and Tikhonov, Sergey and Niejende, Hugues and Schafmeister, Frank and Bocker, Joachim and Thiemann, Peter}},
  booktitle    = {{2019 21st European Conference on Power Electronics and Applications (EPE '19 ECCE Europe)}},
  isbn         = {{9789075815313}},
  title        = {{{A PCB Integrated Winding Using a Litz Structure for a Wireless Charging Coil}}},
  doi          = {{10.23919/epe.2019.8914900}},
  year         = {{2019}},
}

@inproceedings{16438,
  author       = {{Rehlaender, Philipp and Schafmeister, Frank and Bocker, Joachim and Grote, Tobias}},
  booktitle    = {{2019 IEEE 28th International Symposium on Industrial Electronics (ISIE)}},
  isbn         = {{9781728136660}},
  title        = {{{Analytical Topology Comparison for a Single Stage On-Board EV-Battery Converter}}},
  doi          = {{10.1109/isie.2019.8781222}},
  year         = {{2019}},
}

@inbook{16443,
  author       = {{Rehlaender, Philipp and Schroeer, Maik and Chadha, Gavneet and Schwung, Andreas}},
  booktitle    = {{Proceedings of the International Neural Networks Society}},
  isbn         = {{9783030168407}},
  issn         = {{2661-8141}},
  title        = {{{Traffic Sign Detection Using R-CNN}}},
  doi          = {{10.1007/978-3-030-16841-4_24}},
  year         = {{2019}},
}

@article{16709,
  author       = {{Sahai, Tuhin and Ziessler, Adrian and Klus, Stefan and Dellnitz, Michael}},
  issn         = {{0924-090X}},
  journal      = {{Nonlinear Dynamics}},
  title        = {{{Continuous relaxations for the traveling salesman problem}}},
  doi          = {{10.1007/s11071-019-05092-5}},
  year         = {{2019}},
}

@inproceedings{16793,
  author       = {{Pfeifer, Florian and Dietrich, André and Marten, Thorsten and Tröster, Thomas and Nacke, Bernard}},
  booktitle    = {{Proceedings of 7th International Conference on Hot Sheet Metal Forming of High-Performance Steel}},
  isbn         = {{978-3-95735-104-3}},
  location     = {{Luleå}},
  pages        = {{585--593}},
  title        = {{{Investigation on Inductive Heating of Sheet Metal for an Industrial Hot Stamping Process}}},
  year         = {{2019}},
}

@inproceedings{16794,
  author       = {{Striewe, Jan André and Thomas, Robert and Fischer, Fabian and Wiens, Timo and Tröster, Thomas}},
  location     = {{Neu-Ulm}},
  publisher    = {{DGM-Inventum GmbH }},
  title        = {{{Energieabsorptions- und Versagensverhalten eines automobilen Seitenschwellers mit lokaler Verstärkung aus kohlenstofffaserverstärktem Kunststoff nach Alterung}}},
  year         = {{2019}},
}

@misc{16825,
  author       = {{Ahlers, Dominik and Tröster, Thomas}},
  publisher    = {{EuroPM}},
  title        = {{{Performance Parameters and HIP Routes for additively manufactured titanium alloy Ti6Al4V}}},
  year         = {{2019}},
}

@inproceedings{16826,
  author       = {{Camberg, Alan Adam and Hielscher, Christian}},
  booktitle    = {{Aachen Body Engineering Days 2019}},
  location     = {{Aachen}},
  title        = {{{A holistic approach to the lightweight design of tailored structural components using the example of a hybrid A-pillar}}},
  year         = {{2019}},
}