[{"doi":"10.1145/3218176.3218231","place":"New York, NY, USA","external_id":{"arxiv":["1710.10899"]},"date_created":"2018-03-22T10:53:01Z","citation":{"mla":"Lass, Michael, et al. “A Massively Parallel Algorithm for the Approximate Calculation of Inverse P-Th Roots of Large Sparse Matrices.” *Proc. Platform for Advanced Scientific Computing (PASC) Conference*, ACM, 2018, doi:10.1145/3218176.3218231.","ieee":"M. Lass, S. Mohr, H. Wiebeler, T. Kühne, and C. Plessl, “A Massively Parallel Algorithm for the Approximate Calculation of Inverse p-th Roots of Large Sparse Matrices,” in *Proc. Platform for Advanced Scientific Computing (PASC) Conference*, Basel, Switzerland, 2018.","bibtex":"@inproceedings{Lass_Mohr_Wiebeler_Kühne_Plessl_2018, place={New York, NY, USA}, title={A Massively Parallel Algorithm for the Approximate Calculation of Inverse p-th Roots of Large Sparse Matrices}, DOI={10.1145/3218176.3218231}, booktitle={Proc. Platform for Advanced Scientific Computing (PASC) Conference}, publisher={ACM}, author={Lass, Michael and Mohr, Stephan and Wiebeler, Hendrik and Kühne, Thomas and Plessl, Christian}, year={2018} }","short":"M. Lass, S. Mohr, H. Wiebeler, T. Kühne, C. Plessl, in: Proc. Platform for Advanced Scientific Computing (PASC) Conference, ACM, New York, NY, USA, 2018.","apa":"Lass, M., Mohr, S., Wiebeler, H., Kühne, T., & Plessl, C. (2018). A Massively Parallel Algorithm for the Approximate Calculation of Inverse p-th Roots of Large Sparse Matrices. In *Proc. Platform for Advanced Scientific Computing (PASC) Conference*. New York, NY, USA: ACM. https://doi.org/10.1145/3218176.3218231","chicago":"Lass, Michael, Stephan Mohr, Hendrik Wiebeler, Thomas Kühne, and Christian Plessl. “A Massively Parallel Algorithm for the Approximate Calculation of Inverse P-Th Roots of Large Sparse Matrices.” In *Proc. Platform for Advanced Scientific Computing (PASC) Conference*. New York, NY, USA: ACM, 2018. https://doi.org/10.1145/3218176.3218231.","ama":"Lass M, Mohr S, Wiebeler H, Kühne T, Plessl C. A Massively Parallel Algorithm for the Approximate Calculation of Inverse p-th Roots of Large Sparse Matrices. In: *Proc. Platform for Advanced Scientific Computing (PASC) Conference*. New York, NY, USA: ACM; 2018. doi:10.1145/3218176.3218231"},"status":"public","author":[{"full_name":"Lass, Michael","last_name":"Lass","first_name":"Michael","id":"24135","orcid":"0000-0002-5708-7632"},{"last_name":"Mohr","full_name":"Mohr, Stephan","first_name":"Stephan"},{"full_name":"Wiebeler, Hendrik","last_name":"Wiebeler","first_name":"Hendrik"},{"full_name":"Kühne, Thomas","last_name":"Kühne","first_name":"Thomas","id":"49079"},{"first_name":"Christian","last_name":"Plessl","full_name":"Plessl, Christian","orcid":"0000-0001-5728-9982","id":"16153"}],"year":"2018","publisher":"ACM","conference":{"end_date":"2018-07-04","location":"Basel, Switzerland","name":"Platform for Advanced Scientific Computing Conference (PASC)","start_date":"2018-07-02"},"date_updated":"2019-07-23T12:13:45Z","project":[{"grant_number":"PL 595/2-1","name":"Performance and Efficiency in HPC with Custom Computing","_id":"32"},{"name":"Computing Resources Provided by the Paderborn Center for Parallel Computing","_id":"52"}],"user_id":"24135","type":"conference","publication_identifier":{"isbn":["978-1-4503-5891-0/18/07"]},"publication":"Proc. Platform for Advanced Scientific Computing (PASC) Conference","keyword":["approximate computing","linear algebra","matrix inversion","matrix p-th roots","numeric algorithm","parallel computing"],"_id":"1590","title":"A Massively Parallel Algorithm for the Approximate Calculation of Inverse p-th Roots of Large Sparse Matrices","department":[{"_id":"27"},{"_id":"518"},{"_id":"304"}],"language":[{"iso":"eng"}],"abstract":[{"text":"We present the submatrix method, a highly parallelizable method for the approximate calculation of inverse p-th roots of large sparse symmetric matrices which are required in different scientific applications. Following the idea of Approximate Computing, we allow imprecision in the final result in order to utilize the sparsity of the input matrix and to allow massively parallel execution. For an n x n matrix, the proposed algorithm allows to distribute the calculations over n nodes with only little communication overhead. The result matrix exhibits the same sparsity pattern as the input matrix, allowing for efficient reuse of allocated data structures.\r\n\r\nWe evaluate the algorithm with respect to the error that it introduces into calculated results, as well as its performance and scalability. We demonstrate that the error is relatively limited for well-conditioned matrices and that results are still valuable for error-resilient applications like preconditioning even for ill-conditioned matrices. We discuss the execution time and scaling of the algorithm on a theoretical level and present a distributed implementation of the algorithm using MPI and OpenMP. We demonstrate the scalability of this implementation by running it on a high-performance compute cluster comprised of 1024 CPU cores, showing a speedup of 665x compared to single-threaded execution.","lang":"eng"}]}]