@inproceedings{614317ad1cc8486f8eb1d14e5f1a0c63,
title = "Data transfer matters for GPU computing",
abstract = "Graphics processing units (GPUs) embrace many-core compute devices where massively parallel compute threads are offloaded from CPUs. This heterogeneous nature of GPU computing raises non-trivial data transfer problems especially against latency-critical real-time systems. However even the basic characteristics of data transfers associated with GPU computing are not well studied in the literature. In this paper, we investigate and characterize currently-achievable data transfer methods of cutting-edge GPU technology. We implement these methods using open-source software to compare their performance and latency for real-world systems. Our experimental results show that the hardware-assisted direct memory access (DMA) and the I/O read-and-write access methods are usually the most effective, while on-chip micro controllers inside the GPU are useful in terms of reducing the data transfer latency for concurrent multiple data streams. We also disclose that CPU priorities can protect the performance of GPU data transfers.",
keywords = "Data Transfer, GPGPU, Latency, OS, Performance",
author = "Yusuke Fujii and Takuya Azumi and Nobuhiko Nishio and Shinpei Kato and Masato Edahiro",
year = "2013",
doi = "10.1109/ICPADS.2013.47",
language = "English",
isbn = "9781479920815",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
publisher = "IEEE Computer Society",
pages = "275--282",
booktitle = "Proceedings - 2013 19th IEEE International Conference on Parallel and Distributed Systems, ICPADS 2013",
note = "2013 19th IEEE International Conference on Parallel and Distributed Systems, ICPADS 2013 ; Conference date: 15-12-2013 Through 18-12-2013",
}