@article{165,
  abstract     = {{A broad spectrum of applications can be accelerated by offloading computation intensive parts to reconfigurable hardware. However, to achieve speedups, the number of loop it- erations (trip count) needs to be sufficiently large to amortize offloading overheads. Trip counts are frequently not known at compile time, but only at runtime just before entering a loop. Therefore, we propose to generate code for both the CPU and the coprocessor, and defer the offloading decision to the application runtime. We demonstrate how a toolflow, based on the LLVM compiler framework, can automatically embed dynamic offloading de- cisions into the application code. We perform in-depth static and dynamic analysis of pop- ular benchmarks, which confirm the general potential of such an approach. We also pro- pose to optimize the offloading process by decoupling the runtime decision from the loop execution (decision slack). The feasibility of our approach is demonstrated by a toolflow that automatically identifies suitable data-parallel loops and generates code for the FPGA coprocessor of a Convey HC-1. We evaluate the integrated toolflow with representative loops executed for different input data sizes.}},
  author       = {{Vaz, Gavin Francis and Riebler, Heinrich and Kenter, Tobias and Plessl, Christian}},
  issn         = {{0045-7906}},
  journal      = {{Computers and Electrical Engineering}},
  pages        = {{91--111}},
  publisher    = {{Elsevier}},
  title        = {{{Potential and Methods for Embedding Dynamic Offloading Decisions into Application Code}}},
  doi          = {{10.1016/j.compeleceng.2016.04.021}},
  volume       = {{55}},
  year         = {{2016}},
}

