-
Notifications
You must be signed in to change notification settings - Fork 0
/
mybibfile.bib
364 lines (325 loc) · 12.8 KB
/
mybibfile.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
@article{Dirac1953888,
title = "The {L}orentz transformation and absolute time",
journal = "Physica",
volume = "19",
number = "1-–12",
pages = "888--896",
year = "1953",
doi = "10.1016/S0031-8914(53)80099-6",
author = "P.A.M. Dirac"
}
@article{Feynman1963118,
title = "The theory of a general quantum system interacting with a linear dissipative system",
journal = "Annals of Physics",
volume = "24",
pages = "118--173",
year = "1963",
doi = "10.1016/0003-4916(63)90068-X",
author = "R.P Feynman and F.L {Vernon Jr.}"
}
@article{AdamsHirvijokiKnepleyBrownIsaacMills2017,
title = {Landau Collision Integral Solver with Adaptive Mesh Refinement on Emerging Architectures},
author = {Mark F. Adams and Eero Hirvijoki and Matthew G. Knepley and Jed Brown and Tobin Isaac and Richard Mills},
journal = {SIAM Journal on Scientific Computing},
volume = {39},
number = {6},
pages = {C452--C465},
doi = {10.1137/17M1118828},
year = {2017}
}
@article{OSUMicro,
title={{OSU} Microbenchmarks v5.6.2},
author={Panda, DK and others},
journal={http://mvapich.cse.ohio-state.edu/benchmarks/},
year = {2019},
}
@incollection{petsc-msk2013,
Author = {Victor Minden and Barry F. Smith and Matthew G. Knepley},
Booktitle = {{GPU} Solutions to Multi-scale Problems in Science and Engineering},
Editor = {David A. Yuen and Long Wang and Xuebin Chi and Lennart Johnsson and Wei Ge and Yaolin Shi},
Isbn = {978-3-642-16404-0},
Pages = {131--140},
Publisher = {Springer Berlin Heidelberg},
Series = {Lecture Notes in Earth System Sciences},
Title = {Preliminary Implementation of {PETSc} Using {GPUs}},
doi = {10.1007/978-3-642-16405-7_7},
Year = {2013},
}
@techreport{osti_1614879,
title = {Evaluation of {PETSc} on a Heterogeneous Architecture, the {OLCF} {S}ummit System: Part {I}: Vector Node Performance},
author = {Morgan, Hannah Mairs and Mills, Richard Tran and Smith, Barry},
abstractNote = {Our goal is to report on the basic performance of {PETSc} vector operations on a single node of the Oakridge Leadership Facility System Summit. We describe the Summit system and present data collected from several vector operations. Limited analysis is also presented.},
institution = {Argonne National Laboratory},
doi = {10.2172/1614879},
number = {ANL-19/41},
place = {United States},
year = {2020}
}
@INPROCEEDINGS{SNIR,
author={N. {Dryden} and N. {Maruyama} and T. {Moon} and T. {Benson} and A. {Yoo} and M. {Snir} and B. {Van Essen}},
booktitle={2018 IEEE/ACM Machine Learning in HPC Environments (MLHPC)},
title={Aluminum: An Asynchronous, {GPU}-Aware Communication Library Optimized for Large-Scale Training of Deep Neural Networks on {HPC} Systems},
year={2018},
volume={},
number={},
pages={1-13},
doi={10.1109/MLHPC.2018.8638639},
ISSN={null},
month={Nov}
}
@TechReport{petsc-user-ref,
author = {Satish Balay and Shrirang Abhyankar and Mark~F. Adams and Jed Brown and Peter Brune
and Kris Buschelman and Lisandro Dalcin and Alp Dener and Victor Eijkhout and William~D. Gropp
and Dmitry Karpeyev and Dinesh Kaushik and Matthew~G. Knepley and Dave~A. May and Lois Curfman McInnes
and Richard Tran Mills and Todd Munson and Karl Rupp and Patrick Sanan
and Barry~F. Smith and Stefano Zampini and Hong Zhang and Hong Zhang},
title = {{PETS}c Users Manual},
institution = {Argonne National Laboratory},
year = 2020,
number = {ANL-95/11 - Revision 3.14},
note = {https://www.mcs.anl.gov/petsc}
}
@TechReport{sf-tech-report,
author = {Junchao Zhang and Richard Tran Mills and Barry F. Smith},
title = {Evaluation of {PETSc} on a Heterogeneous Architecture, the {OLCF} {S}ummit System: Part {II}: Basic Communication Performance},
institution = {Argonne National Laboratory},
year = 2020,
number = {ANL-20/76},
}
@article{KSPHPDDM,
Author = {Jolivet, Pierre and Roman, Jose E. and Zampini, Stefano},
Title = {{KSPHPDDM} and {PCHPDDM}: Extending {PETSc} with Robust Overlapping {Schwarz} Preconditioners and Advanced {Krylov} Methods},
Year = {2020},
Journal = {submitted for publication},
note = {, https://github.com/prj-/jolivet2020petsc}
}
@misc{THRUST,
title={Thrust: Code at the speed of light},
author={Nathan Bell and Jared Hoberock},
year={2020},
note={https://github.com/NVIDIA/thrust},
}
@misc{OPENFOAM,
title={{OpenFOAM}},
author={{The OpenFOAM Foundation}},
year={2020},
note={https://openfoam.org/}
}
@misc{PETSc4FOAM,
title={{PETSc4FOAM}},
author={Simone Bna and Mark Olesen and Stefano Zampini},
year={2020},
note={https://develop.openfoam.com/modules/external-solver}
}
@misc{OpenFOAMLid,
title={{OpenFOAM HPC} benchmark suite: {3D} Lid Driven cavity flow},
author={},
year={2020},
note={https://develop.openfoam.com/committees/hpc/-/tree/develop/Lid\_driven\_cavity-3d/M},
}
@inproceedings{PCTELESCOPE,
author = {May, Dave A. and Sanan, Patrick and Rupp, Karl and Knepley, Matthew G. and Smith, Barry F.},
title = {Extreme-Scale Multigrid Components within {PETSc}},
year = {2016},
isbn = {9781450341264},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
//doi = {10.1145/2929908.2929913},
booktitle = {Proceedings of the Platform for Advanced Scientific Computing Conference},
articleno = {5},
pages={1-12},
keywords = {preconditioning, agglomeration, coarse-level solver, parallel computing, GPU, multigrid, HPC},
location = {Lausanne, Switzerland},
series = {PASC '16}
}
@misc{NVSHMEM,
title={{NVIDIA} {OpenSHMEM} Library ({NVSHMEM}) Documentation},
author={NVIDIA},
year={2020},
note={https://docs.nvidia.com/hpc-sdk/nvshmem/api/docs/introduction.html},
}
@misc{NCCL,
title={{NVIDIA} Collective Communication Library ({NCCL}) Documentation},
author={NVIDIA},
year={2020},
note={https://docs.nvidia.com/deeplearning/nccl/archives/nccl\_278/user-guide/docs/index.html},
}
@misc{OpenSHMEM,
title={{OpenSHMEM} Application Programming Interface V1.5},
author={{Open Source Software Solutions, Inc.}},
year={2020},
note={http://www.openshmem.org/},
}
@article{BLIS7,
author = {Field G. {V}an~{Z}ee and Devangi N. Parikh and Robert A. van~de~{G}eijn},
title = {Supporting Mixed-domain Mixed-precision Matrix Multiplication within the {BLIS} Framework},
journal = {ACM Transactions on Mathematical Software},
year = {2019},
note = {, submitted}
}
@article{KOKKOS,
title = {Kokkos: Enabling manycore performance portability through polymorphic memory access patterns},
journal = {Journal of Parallel and Distributed Computing},
volume = {74},
number = {12},
pages = {3202--3216},
year = {2014},
note = {{D}omain-Specific Languages and High-Level Frameworks for High-Performance Computing},
issn = {0743-7315},
doi = {10.1016/j.jpdc.2014.07.003},
author = {H. Carter Edwards and Christian R. Trott and Daniel Sunderland}
}
@inproceedings{RAJA,
title={{RAJA}: Portable performance for large-scale scientific applications},
author={Beckingsale, David A and Burmark, Jason and Hornung, Rich and Jones, Holger and Killian, William and Kunen, Adam J and Pearce, Olga and Robinson, Peter and Ryujin, Brian S and Scogland, Thomas RW},
booktitle={2019 IEEE/ACM International Workshop on Performance, Portability and Productivity in HPC (P3HPC)},
pages={71--81},
year={2019},
organization={IEEE}
}
@misc{SYCL,
title={{SYCL} Specification: Generic heterogeneous computing for modern {C++}},
author={{Khronos SYCL Working Group}},
year={2020},
note={https://www.khronos.org/-registry/SYCL/specs/sycl-2020-provisional.pdf},
}
@misc{DPC++,
title={Data Parallel {C++}: Mastering {DPC++} for Programming of Heterogeneous Systems using {C++} and {SYCL}},
author={Reinders, James and Ashbaugh, Ben and Brodman, James and Kinsner, Michael and Pennycook, John and Tian, Xinmin},
year={2020},
publisher={Apress}
}
@article{VIENNACL,
title={Vienna{CL}---linear algebra library for multi-and many-core architectures},
author={Rupp, Karl and Tillet, Philippe and Rudolf, Florian and Weinbub, Josef and Morhammer, Andreas and Grasser, Tibor and Jungel, Ansgar and Selberherr, Siegfried},
journal={SIAM Journal on Scientific Computing},
volume={38},
number={5},
pages={S412--S439},
year={2016},
publisher={SIAM}
}
@article{OPENCL,
title={{OpenCL}: A parallel programming standard for heterogeneous computing systems},
author={Stone, John E and Gohara, David and Shi, Guochun},
journal={Computing in science \& engineering},
volume={12},
number={3},
pages={66--73},
year={2010},
publisher={IEEE Computer Society}
}
@misc{CUDA,
title={{CUDA C++} Programming Guide},
author={{NVIDA}},
year={2020},
note={https://docs.nvidia.com/cuda/-pdf/CUDA\_C\_Programming\_Guide.pdf}
}
@misc{HIP,
title={{HIP} Programming Guide},
author={{AMD}},
year={2020},
note={https://rocmdocs.amd.com/en/latest/-Programming\_Guides/HIP-GUIDE.html}
}
@article{Karl2020preparing,
title={Preparing sparse solvers for exascale computing},
author={Anzt, Hartwig and Boman, Erik and Falgout, Rob and Ghysels, Pieter and Heroux, Michael and Li, Xiaoye and Curfman McInnes, Lois and Tran Mills, Richard and Rajamanickam, Sivasankaran and Rupp, Karl and others},
journal={Philosophical Transactions of the Royal Society A},
volume={378},
number={2166},
pages={20190053},
year={2020},
publisher={The Royal Society Publishing}
}
@article{ChangPerformanceSpectrum,
title={A performance spectrum for parallel computational frameworks that solve {PDEs}},
author={Chang, J and Nakshatrala, KB and Knepley, Matthew G and Johnsson, L},
journal={Concurrency and Computation: Practice and Experience},
volume={30},
number={11},
pages={e4401},
year={2018},
publisher={Wiley Online Library}
}
@article{ChangTASSpectrum,
title={Comparative study of finite element methods using the Time-Accuracy-Size ({TAS}) spectrum analysis},
author={Chang, Justin and Fabien, Maurice S and Knepley, Matthew G and Mills, Richard T},
journal={SIAM Journal on Scientific Computing},
volume={40},
number={6},
pages={C779--C802},
year={2018},
publisher={SIAM}
}
@article{blanchard2020mixed,
title={Mixed precision block fused multiply-add: {E}rror analysis and application to {GPU} Tensor Cores},
author={Blanchard, Pierre and Higham, Nicholas J and Lopez, Florent and Mary, Th{\'e}o and Pranesh, Srikara},
journal={SIAM Journal on Scientific Computing},
volume={42},
number={3},
pages={C124--C141},
year={2020},
publisher={SIAM}
}
@article{zachariadis2020accelerating,
title={Accelerating sparse matrix--matrix multiplication with {GPU} Tensor Cores},
author={Zachariadis, Orestis and Satpute, Nitin and G{\'o}mez-Luna, Juan and Olivares, Joaqu{\'\i}n},
journal={Computers \& Electrical Engineering},
volume={88},
pages={106848},
year={2020},
publisher={Elsevier}
}
@inproceedings{haidar2018harnessing,
title={Harnessing {GPU} tensor cores for fast {FP16} arithmetic to speed up mixed-precision iterative refinement solvers},
author={Haidar, Azzam and Tomov, Stanimire and Dongarra, Jack and Higham, Nicholas J},
booktitle={SC18: International Conference for High Performance Computing, Networking, Storage and Analysis},
pages={603--613},
year={2018},
organization={IEEE}
}
@inproceedings{bkmms2012,
author = {Jed Brown and Matthew G. Knepley and David A. May and Lois C. McInnes and Barry F. Smith},
title = {Composable linear solvers for multiphysics},
booktitle = {Proceeedings of the 11th {International Symposium on Parallel and Distributed Computing} ({ISPDC} 2012)},
year = {2012},
//doi = {10.1109/ISPDC.2012.16},
publisher = {IEEE Computer Society},
pages = {55--62},
}
@article{byrd1994representations,
title={Representations of quasi-Newton matrices and their use in limited memory methods},
author={Byrd, Richard H and Nocedal, Jorge and Schnabel, Robert B},
journal={Mathematical Programming},
volume={63},
number={1-3},
pages={129--156},
year={1994},
publisher={Springer}
}
@article{erway2017solving,
title={On solving large-scale limited-memory quasi-Newton equations},
author={Erway, Jennifer B and Marcia, Roummel F},
journal={Linear Algebra and its Applications},
volume={515},
pages={196--225},
year={2017},
publisher={Elsevier}
}
@article{filippone2017GPGPUSpMV,
author = {Filippone, Salvatore and Cardellini, Valeria and Barbieri, Davide and Fanfarillo, Alessandro},
title = {Sparse Matrix-Vector Multiplication on {GPGPUs}},
year = {2017},
issue_date = {March 2017},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {43},
number = {4},
issn = {0098-3500},
doi = {10.1145/3017994},
journal = {ACM Trans. Math. Softw.},
month = jan,
articleno = {30},
numpages = {49},
keywords = {Sparse matrices, {GPU} programming}
}