-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbibliography.bib
765 lines (712 loc) · 57.2 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
@inproceedings{ongaroSearchUnderstandableConsensus2014,
author = {Ongaro, Diego and Ousterhout, John},
title = {In Search of an Understandable Consensus Algorithm},
year = {2014},
isbn = {9781931971102},
publisher = {USENIX Association},
address = {USA},
abstract = {Raft is a consensus algorithm for managing a replicated log. It produces a result
equivalent to (multi-)Paxos, and it is as efficient as Paxos, but its structure is
different from Paxos; this makes Raft more understandable than Paxos and also provides
a better foundation for building practical systems. In order to enhance understandability,
Raft separates the key elements of consensus, such as leader election, log replication,
and safety, and it enforces a stronger degree of coherency to reduce the number of
states that must be considered. Results from a user study demonstrate that Raft is
easier for students to learn than Paxos. Raft also includes a new mechanism for changing
the cluster membership, which uses overlapping majorities to guarantee safety.},
booktitle = {Proceedings of the 2014 USENIX Conference on USENIX Annual Technical Conference},
pages = {305–320},
numpages = {16},
location = {Philadelphia, PA},
series = {USENIX ATC'14}
}
@misc{ongaroBugSingleserverMembership2015,
title = {bug in single-server membership changes},
url = {https://groups.google.com/g/raft-dev/c/t4xj6dJTP6E/m/d2D9LrWRza8J},
urldate = {2021-09-01},
author = {Ongaro, Diego},
day = 10,
month = jul,
year = {2015},
}
@phdthesis{ongaroConsensusBridgingTheory2014,
author = {Ongaro, Diego},
advisor = {K., Ousterhout, John and David, Mazi\`{e}res, and Mendel, Rosenblum,},
title = {Consensus: Bridging Theory and Practice},
year = {2014},
isbn = {9798662514218},
publisher = {Stanford University},
address = {Stanford, CA, USA},
abstract = {Distributed consensus is fundamental to building fault-tolerant systems. It allows
a collection of machines to work as a coherent group that can survive the failures
of some of its members. Unfortunately, the most common consensus algorithm, , is widely
regarded as difficult to understand and implement correctly. This dissertation presents
a new consensus algorithm called Raft, which was designed for understandability. Raft
first elects a server as leader, then concentrates all decision-making onto the leader.
These two basic steps are relatively independent and form a better structure than
Paxos, whose components are hard to separate. Raft elects a leader using voting and
randomized timeouts. The election guarantees that the leader already stores all the
information it needs, so data only flows outwards from the leader to other servers.
Compared to other leader-based algorithms, this reduces mechanism and simplifies the
behavior. Once a leader is elected, it manages a replicated log. Raft leverages a
simple invariant on how logs grow to reduce the algorithm's state space and accomplish
this task with minimal mechanism. Raft is also more suitable than previous algorithms
for real-world implementations. It performs well enough for practical deployments,
and it addresses all aspects of building a complete system, including how to manage
client interactions, how to change the cluster membership, and how to compact the
log when it grows too large. To change the cluster membership, Raft allows adding
or removing one server at a time (complex changes can be composed from these basic
steps), and the cluster continues servicing requests throughout the change. We believe
that Raft is superior to Paxos and other consensus algorithms, both for educational
purposes and as a foundation for implementation. Results from a user study demonstrate
that Raft is easier for students to learn than Paxos. The algorithm has been formally
specified and proven, its leader election algorithm works well in a variety of environments,
and its performance is equivalent to Multi-Paxos. Many implementations of Raft are
now available, and several companies are deploying Raft.},
note = {AAI28121474}
}
@article{stoicaChordScalablePeertopeer2001,
author = {Stoica, Ion and Morris, Robert and Karger, David and Kaashoek, M. Frans and Balakrishnan, Hari},
title = {Chord: A Scalable Peer-to-Peer Lookup Service for Internet Applications},
year = {2001},
issue_date = {October 2001},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {31},
number = {4},
issn = {0146-4833},
url = {https://doi.org/10.1145/964723.383071},
doi = {10.1145/964723.383071},
abstract = {A fundamental problem that confronts peer-to-peer applications is to efficiently locate
the node that stores a particular data item. This paper presents Chord, a distributed
lookup protocol that addresses this problem. Chord provides support for just one operation:
given a key, it maps the key onto a node. Data location can be easily implemented
on top of Chord by associating a key with each data item, and storing the key/data
item pair at the node to which the key maps. Chord adapts efficiently as nodes join
and leave the system, and can answer queries even if the system is continuously changing.
Results from theoretical analysis, simulations, and experiments show that Chord is
scalable, with communication cost and the state maintained by each node scaling logarithmically
with the number of Chord nodes.},
journal = {SIGCOMM Comput. Commun. Rev.},
month = aug,
pages = {149–160},
numpages = {12}
}
@inproceedings{liben-nowellAnalysisEvolutionPeertopeer2002,
author = {Liben-Nowell, David and Balakrishnan, Hari and Karger, David},
title = {Analysis of the Evolution of Peer-to-Peer Systems},
year = {2002},
isbn = {1581134851},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/571825.571863},
doi = {10.1145/571825.571863},
abstract = {In this paper, we give a theoretical analysis of peer-to-peer (P2P) networks operating
in the face of concurrent joins and unexpected departures. We focus on Chord, a recently
developed P2P system that implements a distributed hash table abstraction, and study
the process by which Chord maintains its distributed state as nodes join and leave
the system. We argue that traditional performance measures based on run-time are uninformative
for a continually running P2P network, and that the rate at which nodes in the network
need to participate to maintain system state is a more useful metric. We give a general
lower bound on this rate for a network to remain connected, and prove that an appropriately
modified version of Chord's maintenance rate is within a logarithmic factor of the
optimum rate.},
booktitle = {Proceedings of the Twenty-First Annual Symposium on Principles of Distributed Computing},
pages = {233–242},
numpages = {10},
location = {Monterey, California},
series = {PODC '02}
}
@inproceedings{rowstronPastryScalableDecentralized2001,
address = {Berlin, Heidelberg},
series = {Lecture {Notes} in {Computer} {Science}},
title = {Pastry: {Scalable}, {Decentralized} {Object} {Location}, and {Routing} for {Large}-{Scale} {Peer}-to-{Peer} {Systems}},
isbn = {978-3-540-45518-9},
shorttitle = {Pastry},
doi = {10.1007/3-540-45518-3_18},
abstract = {This paper presents the design and evaluation of Pastry, a scalable, distributed object location and routing substrate for wide-area peer-to-peer applications. Pastry performs application-level routing and object location in a potentially very large overlay network of nodes connected via the Internet. It can be used to support a variety of peer-to-peer applications, including global data storage, data sharing, group communication and naming.Each node in the Pastry network has a unique identifier (nodeId). When presented with a message and a key, a Pastry node efficiently routes the message to the node with a nodeId that is numerically closest to the key, among all currently live Pastry nodes. Each Pastry node keeps track of its immediate neighbors in the nodeId space, and notifies applications of new node arrivals, node failures and recoveries. Pastry takes into account network locality; it seeks to minimize the distance messages travel, according to a to scalar proximity metric like the number of IP routing hopsPastry is completely decentralized, scalable, and self-organizing; it automatically adapts to the arrival, departure and failure of nodes. Experimental results obtained with a prototype implementation on an emulated network of up to 100,000 nodes confirm Pastry’s scalability and efficiency, its ability to self-organize and adapt to node failures, and its good network locality properties},
language = {en},
booktitle = {Middleware 2001},
publisher = {Springer},
author = {Rowstron, Antony and Druschel, Peter},
editor = {Guerraoui, Rachid},
year = {2001},
pages = {329--350},
}
@article{kotlaZyzzyvaSpeculativeByzantine2009,
author = {Kotla, Ramakrishna and Alvisi, Lorenzo and Dahlin, Mike and Clement, Allen and Wong, Edmund},
title = {Zyzzyva: Speculative Byzantine Fault Tolerance},
year = {2010},
issue_date = {December 2009},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {27},
number = {4},
issn = {0734-2071},
url = {https://doi.org/10.1145/1658357.1658358},
doi = {10.1145/1658357.1658358},
abstract = {A longstanding vision in distributed systems is to build reliable systems from unreliable
components. An enticing formulation of this vision is Byzantine Fault-Tolerant (BFT)
state machine replication, in which a group of servers collectively act as a correct
server even if some of the servers misbehave or malfunction in arbitrary (“Byzantine”)
ways. Despite this promise, practitioners hesitate to deploy BFT systems, at least
partly because of the perception that BFT must impose high overheads.In this article,
we present Zyzzyva, a protocol that uses speculation to reduce the cost of BFT replication.
In Zyzzyva, replicas reply to a client's request without first running an expensive
three-phase commit protocol to agree on the order to process requests. Instead, they
optimistically adopt the order proposed by a primary server, process the request,
and reply immediately to the client. If the primary is faulty, replicas can become
temporarily inconsistent with one another, but clients detect inconsistencies, help
correct replicas converge on a single total ordering of requests, and only rely on
responses that are consistent with this total order. This approach allows Zyzzyva
to reduce replication overheads to near their theoretical minima and to achieve throughputs
of tens of thousands of requests per second, making BFT replication practical for
a broad range of demanding services.},
journal = {ACM Trans. Comput. Syst.},
month = jan,
articleno = {7},
numpages = {39},
keywords = {output commit, speculative execution, replication, Byzantine fault tolerance}
}
@inproceedings{10.1145/1294261.1294267,
author = {Kotla, Ramakrishna and Alvisi, Lorenzo and Dahlin, Mike and Clement, Allen and Wong, Edmund},
title = {Zyzzyva: Speculative Byzantine Fault Tolerance},
year = {2007},
isbn = {9781595935915},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1294261.1294267},
doi = {10.1145/1294261.1294267},
abstract = {We present Zyzzyva, a protocol that uses speculation to reduce the cost and simplify
the design of Byzantine fault tolerant state machine replication. In Zyzzyva, replicas
respond to a client's request without first running an expensive three-phase commit
protocol to reach agreement on the order in which the request must be processed. Instead,
they optimistically adopt the order proposed by the primary and respond immediately
to the client. Replicas can thus become temporarily inconsistent with one another,
but clients detect inconsistencies, help correct replicas converge on a single total
ordering of requests, and only rely on responses that are consistent with this total
order. This approach allows Zyzzyva to reduce replication overheads to near their
theoretical minimal.},
booktitle = {Proceedings of Twenty-First ACM SIGOPS Symposium on Operating Systems Principles},
pages = {45–58},
numpages = {14},
keywords = {output commit, byzantine fault tolerance, replication, speculative execution},
location = {Stevenson, Washington, USA},
series = {SOSP '07}
}
@article{kotlaZyzzyvaSpeculativeByzantine2007,
author = {Kotla, Ramakrishna and Alvisi, Lorenzo and Dahlin, Mike and Clement, Allen and Wong, Edmund},
title = {Zyzzyva: Speculative Byzantine Fault Tolerance},
year = {2007},
issue_date = {December 2007},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {41},
number = {6},
issn = {0163-5980},
url = {https://doi.org/10.1145/1323293.1294267},
doi = {10.1145/1323293.1294267},
abstract = {We present Zyzzyva, a protocol that uses speculation to reduce the cost and simplify
the design of Byzantine fault tolerant state machine replication. In Zyzzyva, replicas
respond to a client's request without first running an expensive three-phase commit
protocol to reach agreement on the order in which the request must be processed. Instead,
they optimistically adopt the order proposed by the primary and respond immediately
to the client. Replicas can thus become temporarily inconsistent with one another,
but clients detect inconsistencies, help correct replicas converge on a single total
ordering of requests, and only rely on responses that are consistent with this total
order. This approach allows Zyzzyva to reduce replication overheads to near their
theoretical minimal.},
journal = {SIGOPS Oper. Syst. Rev.},
month = oct,
pages = {45–58},
numpages = {14},
keywords = {byzantine fault tolerance, replication, output commit, speculative execution}
}
@article{martinFastByzantineConsensus2006,
title = {Fast {Byzantine} {Consensus}},
volume = {3},
copyright = {Copyright IEEE Computer Society Jul-Sep 2006},
issn = {15455971},
url = {http://www.proquest.com/docview/206534931/abstract/A91EECC1018D4A46PQ/1},
doi = {http://dx.doi.org.libproxy1.nus.edu.sg/10.1109/TDSC.2006.35},
abstract = {We present the first protocol that reaches asynchronous Byzantine consensus in two communication steps in the common case. We prove that our protocol is optimal in terms of both number of communication steps and number of processes for two--step consensus. The protocol can be used to build a replicated state machine that requires only three communication steps per request in the common case. Further, we show a parameterized version of the protocol that is safe despite f Byzantine failures and, in the common case, guarantees two-step execution despite some number t of failures (t{\textbackslash}le f). We show that this parameterized two-step consensus protocol is also optimal in terms of both number of communication steps and number of processes. [PUBLICATION ABSTRACT]},
language = {English},
number = {3},
journal = {IEEE Transactions on Dependable and Secure Computing},
author = {Martin, Jean-Philippe and Alvisi, Lorenzo},
month = sep,
year = {2006},
note = {Num Pages: 202-215
Place: Washington, United States
Publisher: IEEE Computer Society},
keywords = {Communication, Computer programming, Computers--Computer Security, Protocol, Studies},
pages = {202--215},
}
@inproceedings{martinFastByzantineConsensus2005,
title = {Fast {Byzantine} {Consensus}},
doi = {10.1109/DSN.2005.48},
abstract = {We present the first consensus protocol that reaches asynchronous Byzantine consensus in two communication steps in the common case. We prove that our protocol is optimal in terms of both number of communication step, and number of processes for 2-step consensus. The protocol can be used to build a replicated state machine that requires only three communication steps per request in the common case.},
booktitle = {2005 {International} {Conference} on {Dependable} {Systems} and {Networks} ({DSN}'05)},
author = {Martin, J.-P. and Alvisi, L.},
month = jun,
year = {2005},
note = {ISSN: 2158-3927},
keywords = {Computer crashes, Delay, Fault tolerance, Fault tolerant systems, Paper technology, Pathology, Protection, Protocols, Safety, Software performance},
pages = {402--411},
}
@inproceedings{moraruThereMoreConsensus2013,
address = {New York, NY, USA},
series = {{SOSP} '13},
title = {There is more consensus in {Egalitarian} parliaments},
isbn = {978-1-4503-2388-8},
url = {http://doi.org/10.1145/2517349.2517350},
doi = {10.1145/2517349.2517350},
abstract = {This paper describes the design and implementation of Egalitarian Paxos (EPaxos), a new distributed consensus algorithm based on Paxos. EPaxos achieves three goals: (1) optimal commit latency in the wide-area when tolerating one and two failures, under realistic conditions; (2) uniform load balancing across all replicas (thus achieving high throughput); and (3) graceful performance degradation when replicas are slow or crash. Egalitarian Paxos is to our knowledge the first protocol to achieve the previously stated goals efficiently---that is, requiring only a simple majority of replicas to be non-faulty, using a number of messages linear in the number of replicas to choose a command, and committing commands after just one communication round (one round trip) in the common case or after at most two rounds in any case. We prove Egalitarian Paxos's properties theoretically and demonstrate its advantages empirically through an implementation running on Amazon EC2.},
booktitle = {Proceedings of the {Twenty}-{Fourth} {ACM} {Symposium} on {Operating} {Systems} {Principles}},
publisher = {Association for Computing Machinery},
author = {Moraru, Iulian and Andersen, David G. and Kaminsky, Michael},
month = nov,
year = {2013},
pages = {358--372},
}
@inproceedings{nawabDPaxosManagingData2018,
address = {New York, NY, USA},
series = {{SIGMOD} '18},
title = {{DPaxos}: {Managing} {Data} {Closer} to {Users} for {Low}-{Latency} and {Mobile} {Applications}},
isbn = {978-1-4503-4703-7},
shorttitle = {{DPaxos}},
url = {https://doi.org/10.1145/3183713.3196928},
doi = {10.1145/3183713.3196928},
abstract = {In this paper, we propose Dynamic Paxos (DPaxos), a Paxos-based consensus protocol to manage access to partitioned data across globally-distributed datacenters and edge nodes. DPaxos is intended to implement a State Machine Replication component in data management systems for the edge. DPaxos targets the unique opportunities of utilizing edge computing resources to support emerging applications with stringent mobility and real-time requirements such as Augmented and Virtual Reality and vehicular applications. The main objective of DPaxos is to reduce the latency of serving user requests, recovering from failures, and reacting to mobility. DPaxos achieves these objectives by a few proposed changes to the traditional Paxos protocol. Most notably, DPaxos proposes a dynamic allocation of quorums ( i.e. , groups of nodes) that are needed for Paxos Leader Election. Leader Election quorums in DPaxos are smaller than traditional Paxos and expand only in the presence of conflicts.},
urldate = {2021-09-07},
booktitle = {Proceedings of the 2018 {International} {Conference} on {Management} of {Data}},
publisher = {Association for Computing Machinery},
author = {Nawab, Faisal and Agrawal, Divyakant and El Abbadi, Amr},
month = may,
year = {2018},
keywords = {edge computing, geo-replication, multi-datacenter, paxos, transaction processing},
pages = {1221--1236},
}
@techreport{amos15812TermPaper2015,
title = {15-812 {Term} {Paper}: {Specifying} and proving cluster membership for the {Raft} distributed consensus algorithm},
url = {https://www.cs.cmu.edu/~aplatzer/course/pls15/projects/bamos.pdf},
author = {Amos, Brandon and Zhang, Huanchen},
year = {2015},
pages = {46},
}
@misc{howardRaftDoesNot2020,
title = {Raft does not {Guarantee} {Liveness} in the face of {Network} {Faults}},
url = {https://decentralizedthoughts.github.io/2020-12-12-raft-liveness-full-omission/},
abstract = {Last month, Cloudflare published a postmortem of a recent 6-hour outage caused by a partial switch failure which left etcd unavailable as it was unable to establish a stable leader. This outage has understandably led to discussion online about exactly what liveness guarantees are provided by the Raft consensus algorithm...},
urldate = {2021-05-07},
author = {Howard, Heidi and Abraham, Ittai},
month = dec,
year = {2020},
}
@article{zaveUsingLightweightModeling2012,
title = {Using lightweight modeling to understand chord},
volume = {42},
issn = {0146-4833},
url = {https://dl.acm.org/doi/10.1145/2185376.2185383},
doi = {10.1145/2185376.2185383},
abstract = {Correctness of the Chord ring-maintenance protocol would mean that the protocol can eventually repair all disruptions in the ring structure, given ample time and no further disruptions while it is working. In other words, it is “eventual reachability.” Under the same assumptions about failure behavior as made in the Chord papers, no published version of Chord is correct. This result is based on modeling the protocol in Alloy and analyzing it with the Alloy Analyzer. By combining the right selection of pseudocode and textual hints from several papers, and fixing flaws revealed by analysis, it is possible to get a version that may be correct. The paper also discusses the significance of these results, describes briefly how Alloy is used to model and reason about Chord, and compares Alloy analysis to model-checking.},
language = {en},
number = {2},
urldate = {2021-09-06},
journal = {ACM SIGCOMM Computer Communication Review},
author = {Zave, Pamela},
month = mar,
year = {2012},
pages = {49--57},
}
@article{zaveReasoningIdentifierSpaces2017,
title = {Reasoning {About} {Identifier} {Spaces}: {How} to {Make} {Chord} {Correct}},
volume = {43},
issn = {1939-3520},
shorttitle = {Reasoning {About} {Identifier} {Spaces}},
doi = {10.1109/TSE.2017.2655056},
abstract = {The Chord distributed hash table (DHT) is well-known and often used to implement peer-to-peer systems. Chord peers find other peers, and access their data, through a ring-shaped pointer structure in a large identifier space. Despite claims of proven correctness, i.e., eventual reachability, previous work has shown that the Chord ring-maintenance protocol is not correct under its original operating assumptions. Previous work has not, however, discovered whether Chord could be made correct under the same assumptions. The contribution of this paper is to provide the first specification of correct operations and initialization for Chord, an inductive invariant that is necessary and sufficient to support a proof of correctness, and two independent proofs of correctness. One proof is informal and intuitive, and applies to networks of any size. The other proof is based on a formal model in Alloy, and uses fully automated analysis to prove the assertions for networks of bounded size. The two proofs complement each other in several important ways.},
number = {12},
journal = {IEEE Transactions on Software Engineering},
author = {Zave, Pamela},
month = dec,
year = {2017},
note = {Conference Name: IEEE Transactions on Software Engineering},
keywords = {Analytical models, Computers and information processing, distributed computing, Distributed processing, formal verification, Formal verification, Information processing, peer-to-peer computing, Peer-to-peer computing, software engineering, Structural rings},
pages = {1144--1156},
}
@incollection{azmyRigorousCorrectnessProof2016,
address = {Cham},
title = {A {Rigorous} {Correctness} {Proof} for {Pastry}},
volume = {9675},
isbn = {978-3-319-33599-5 978-3-319-33600-8},
url = {http://link.springer.com/10.1007/978-3-319-33600-8_5},
abstract = {Peer-to-peer protocols for maintaining distributed hash tables, such as Pastry or Chord, have become popular for a class of Internet applications. While such protocols promise certain properties concerning correctness and performance, verification attempts using formal methods invariably discover border cases that violate some of those guarantees. Tianxiang Lu reported correctness problems in published versions of Pastry and also developed a model, which he called LuPastry, for which he provided a partial proof of correct delivery assuming no node departures, mechanized in the TLA+ Proof System. Lu’s proof is based on certain assumptions that were left unproven. We found counter-examples to several of these assumptions. In this paper, we present a revised model and rigorous proof of correct delivery, which we call LuPastry+. Aside from being the first complete proof, LuPastry+ also improves upon Lu’s work by reformulating parts of the specification in such a way that the reasoning complexity is confined to a small part of the proof.},
language = {en},
urldate = {2021-09-07},
booktitle = {Abstract {State} {Machines}, {Alloy}, {B}, {TLA}, {VDM}, and {Z}},
publisher = {Springer International Publishing},
author = {Azmy, Noran and Merz, Stephan and Weidenbach, Christoph},
editor = {Butler, Michael and Schewe, Klaus-Dieter and Mashkoor, Atif and Biro, Miklos},
year = {2016},
doi = {10.1007/978-3-319-33600-8_5},
note = {Series Title: Lecture Notes in Computer Science},
pages = {86--101},
}
@article{azmyMachinecheckedCorrectnessProof2018,
title = {A machine-checked correctness proof for {Pastry}},
volume = {158},
issn = {01676423},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0167642317301612},
doi = {10.1016/j.scico.2017.08.003},
abstract = {Protocols implemented on overlay networks in a peer-to-peer (P2P) setting promise flexibility, performance, and scalability due to the possibility for nodes to join and leave the network while the protocol is running. These protocols must ensure that all nodes maintain a consistent view of the network, in the absence of centralized control, so that requests can be routed to the intended destination. This aspect represents an interesting target for formal verification. In previous work, Lu studied the Pastry algorithm for implementing a distributed hash table (DHT) over a P2P network and identified problems in published versions of the algorithm. He suggested a variant of the algorithm, together with a machine-checked proof in the TLA+ Proof System (tlaps), assuming the absence of node failures. We identify and correct problems in Lu’s proof that are due to unchecked assumptions concerning modulus arithmetic and underlying data structures. We introduce higher-level abstractions into the specifications and proofs that are intended for improving the degree of automation achieved by the proof backends. These abstractions are instrumental for presenting the first complete formal proof. Finally, we formally prove that an even simpler version of Lu’s algorithm, in which the final phase of the join protocol is omitted, is still correct, again assuming that nodes do not fail.},
language = {en},
urldate = {2021-09-07},
journal = {Science of Computer Programming},
author = {Azmy, Noran and Merz, Stephan and Weidenbach, Christoph},
month = jun,
year = {2018},
pages = {64--80},
}
@article{abrahamRevisitingFastPractical2017,
title = {Revisiting {Fast} {Practical} {Byzantine} {Fault} {Tolerance}},
url = {http://arxiv.org/abs/1712.01367},
abstract = {In this note, we observe a safety violation in Zyzzyva and a liveness violation in FaB. To demonstrate these issues, we require relatively simple scenarios, involving only four replicas, and one or two view changes. In all of them, the problem is manifested already in the first log slot.},
urldate = {2021-09-06},
journal = {arXiv:1712.01367 [cs]},
author = {Abraham, Ittai and Gueta, Guy and Malkhi, Dahlia and Alvisi, Lorenzo and Kotla, Rama and Martin, Jean-Philippe},
month = dec,
year = {2017},
note = {arXiv: 1712.01367},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing},
}
@article{sutraCorrectnessEgalitarianPaxos2020,
title = {On the correctness of {Egalitarian} {Paxos}},
volume = {156},
issn = {0020-0190},
url = {https://www.sciencedirect.com/science/article/pii/S002001901930184X},
doi = {10.1016/j.ipl.2019.105901},
abstract = {This paper identifies a problem in both the TLA+ specification and the implementation of the Egalitarian Paxos protocol. It is related to how replicas switch from one ballot to another when computing the dependencies of a command. The problem may lead replicas to diverge and break the linearizability of the replicated service.},
language = {en},
urldate = {2021-04-01},
journal = {Information Processing Letters},
author = {Sutra, Pierre},
month = apr,
year = {2020},
keywords = {Fault tolerance, Consensus, Distributed systems, State-machine replication},
}
@article{whittakerMatchmakerPaxosReconfigurable2021,
title = {Matchmaker {Paxos}: {A} {Reconfigurable} {Consensus} {Protocol}},
abstract = {State machine replication protocols, like MultiPaxos and Raft, are at the heart of numerous distributed systems. To tolerate machine failures, these protocols must replace failed machines with new machines, a process known as reconfiguration. Reconfiguration has become increasingly important over time as the need for frequent reconfiguration has grown. Despite this, reconfiguration has largely been neglected in the literature. In this paper, we present Matchmaker Paxos and Matchmaker MultiPaxos, a reconfigurable consensus and state machine replication protocol respectively. Our protocols can perform a reconfiguration with little to no impact on the latency or throughput of command processing; they can perform a reconfiguration in a few milliseconds; and they present a framework that can be generalized to other replication protocols in a way that previous reconfiguration techniques can not. We provide proofs of correctness for the protocols and optimizations, and present empirical results from an open source implementation showing that throughput and latency do not change significantly during a reconfiguration.},
language = {en},
journal = {Journal of Systems Research},
author = {Whittaker, Michael and Hellerstein, Joseph M and Giridharan, Neil and Szekeres, Adriana and Howard, Heidi and Nawab, Faisal},
year = {2021},
pages = {22},
}
@article{michaelRecoveringSharedObjects2017,
title = {Recovering {Shared} {Objects} {Without} {Stable} {Storage}},
abstract = {This paper considers the problem of building fault-tolerant shared objects when processes can crash and recover but lose their persistent state on recovery. This Diskless Crash-Recovery (DCR) model matches the way many long-lived systems are built. We show that it presents new challenges, as operations that are recorded at a quorum may not persist after some of the processes in that quorum crash and then recover.},
language = {en},
author = {Michael, Ellis and Ports, Dan R K and Sharma, Naveen Kr and Szekeres, Adriana},
month = aug,
note = {Appendix B},
year = {2017},
pages = {27},
}
@techreport{konczakJPaxosStateMachine2011,
title = {{JPaxos}: {State} machine replication based on the {Paxos} protocol},
shorttitle = {{JPaxos}},
abstract = {State machine replication is a technique for making services fault-tolerant by replicating them over a group of machines. Although the theory of state machine replication has been studied extensively, the engineering challenges of converting a theoretical description into a fully functional system is less understood. This creates difficulties to implementors, because in designing such a system they face many engineering challenges which are crucial to ensure good performance and stability of a replicated system. In this report, we address this problem by describing the design and implementation of JPaxos, a fully-functional implementation of state machine replication based on the MultiPaxos protocol. Our description includes the basic modules of a state machine replication implementation, like snapshotting of service state, state-transfer and keeping up-to-date all replicas, but focus mainly on three aspects: recovery mechanisms, batching and pipelining optimizations, and a scalable threading-architecture. We present several recovery algorithms that vary in the usage of stable storage and on the system assumptions, including some that use stable storage only once per-recovery. Batching and pipelining are well-known optimizations commonly used in state machine replication. With JPaxos we have studied their interaction in detail, and provide guidelines to tune these mechanisms for a variety of systems. Finally, the threading architecture of JPaxos was designed to scale with the number of cores, while at the same time minimizing complexity to reduce the risk of concurrency bugs},
number = {EPFL-REPORT-167765},
author = {Kończak, Jan and de Sousa Santos, Nuno Filipe and Żurkowski, Tomasz and Wojciechowski, Paweł T. and Schiper, André},
year = {2011},
keywords = {Distributed Systems, Fault tolerance, Implementation, Paxos, State Machine Replication},
file = {Kończak et al. - 2011 - JPaxos State machine replication based on the Pax.pdf:C\:\\Users\\georg\\Zotero\\storage\\99JPDXUY\\Kończak et al. - 2011 - JPaxos State machine replication based on the Pax.pdf:application/pdf},
}
@techreport{liskovViewstampedReplicationRevisited2012,
title = {Viewstamped {Replication} {Revisited}},
abstract = {This paper presents an updated version of Viewstamped Replication, a replication technique that handles failures in which nodes crash. It describes how client requests are handled, how the group reorganizes when a replica fails, and how a failed replica is able to rejoin the group. The paper also describes a number of important optimizations and presents a protocol for handling reconfigurations that can change both the group membership and the number of failures the group is able to handle.},
language = {en},
number = {MIT-CSAIL-TR-2012-021},
author = {Liskov, Barbara and Cowling, James},
month = jul,
year = {2012},
pages = {16},
}
@inproceedings{chandraPaxosMadeLive2007,
address = {Portland, Oregon, USA},
title = {Paxos made live: an engineering perspective},
isbn = {978-1-59593-616-5},
shorttitle = {Paxos made live},
url = {http://dl.acm.org/citation.cfm?doid=1281100.1281103},
doi = {10.1145/1281100.1281103},
abstract = {We describe our experience in building a fault-tolerant data-base using the Paxos consensus algorithm. Despite the existing literature in the field, building such a database proved to be non-trivial. We describe selected algorithmic and engineering problems encountered, and the solutions we found for them. Our measurements indicate that we have built a competitive system.},
language = {en},
urldate = {2021-01-19},
booktitle = {Proceedings of the twenty-sixth annual {ACM} symposium on {Principles} of distributed computing - {PODC} '07},
publisher = {ACM Press},
author = {Chandra, Tushar D. and Griesemer, Robert and Redstone, Joshua},
year = {2007},
pages = {398--407},
}
@misc{hochConfigurationChanges2014,
title = {Configuration changes},
url = {https://groups.google.com/g/raft-dev/c/xux5HRxH3Ic/m/mz_PDK-qMJgJ},
urldate = {2021-09-09},
author = {Hoch, Ezra},
month = feb,
year = {2014},
}
@techreport{sutraFastGenuineGeneralized2010,
title = {Fast {Genuine} {Generalized} {Consensus}},
url = {https://drive.google.com/open?id=0BwFkGepvBDQoRjNYRGJTdWQ0SzA},
abstract = {Consensus is a central primitive for building replicated systems, but its latency constitutes a bottleneck. A well-known solution to consensus is Fast Paxos. In a recent paper, Lamport enhances Fast Paxos by leveraging the commutativity of concurrent commands. The new primitive, called Generalized Paxos, reduces the collision rate, and thus the latency of Fast Paxos. However if a collision occurs, the latency of Generalized Paxos equals six communication steps, which is higher than Fast Paxos. This paper presents FGGC , a novel consensus algorithm that reduces recovery delay when a collision occurs to one. FGGC tolerates f {\textless} n/2 replicas crashes, and during failure-free runs, processes learn commands in two steps if all commands commute, and three steps otherwise; this is optimal. Moreover, as long as no fault occurs, FGGC needs only f + 1 replicas to progress.},
author = {Sutra, Pierre and Shapiro, Marc},
month = feb,
year = {2010},
note = {(corrected August 2010). Section 6.3.},
pages = {62},
}
@techreport{lamportGeneralizedConsensusPaxos2005,
title = {Generalized {Consensus} and {Paxos}},
url = {https://www.microsoft.com/en-us/research/publication/generalized-consensus-and-paxos/},
abstract = {In [153], I proved lower bounds for the number of message delays required to reach consensus. I showed that the best algorithms can reach consensus in the normal case in 2 message delays. This result in turn led me to a new version of the Paxos algorithm of [122] called Fast Paxos, described in [158], […]},
number = {MSR-TR-2005-33},
institution = {Microsoft Research},
author = {Lamport, Leslie},
month = mar,
year = {2005},
}
@unpublished{whittakerCRAQBug2020,
title = {{CRAQ} {Bug}},
url = {https://github.com/mwhittaker/craq_bug},
abstract = {A minor bug in CRAQ's garbage collection},
urldate = {2021-09-09},
author = {Whittaker, Michael},
month = jun,
year = {2020},
note = {original-date: 2020-06-13T18:44:33Z},
}
@inproceedings{terraceObjectStorageCRAQ2009,
title = {Object {Storage} on \{{CRAQ}\}: {High}-{Throughput} {Chain} {Replication} for {Read}-{Mostly} {Workloads}},
shorttitle = {Object {Storage} on \{{CRAQ}\}},
url = {https://www.usenix.org/conference/usenix-09/object-storage-craq-high-throughput-chain-replication-read-mostly-workloads},
urldate = {2021-09-09},
author = {Terrace, Jeff and Freedman, Michael J.},
year = {2009},
}
@inproceedings{jensenExaminingRaftBehaviour2021,
address = {New York, NY, USA},
series = {{HAOC} '21},
title = {Examining {Raft}'s behaviour during partial network failures},
isbn = {978-1-4503-8336-3},
url = {https://doi.org/10.1145/3447851.3458739},
doi = {10.1145/3447851.3458739},
abstract = {State machine replication protocols such as Raft are widely used to build highly-available strongly-consistent services, maintaining liveness even if a minority of servers crash. As these systems are implemented and optimised for production, they accumulate many divergences from the original specification. These divergences are poorly documented, resulting in operators having an incomplete model of the system's characteristics, especially during failures. In this paper, we look at one such Raft model used to explain the November Cloudflare outage and show that etcd's behaviour during the same failure differs. We continue to show the specific optimisations in etcd causing this difference and present a more complete model of the outage based on etcd's behaviour in an emulated deployment using reckon. Finally, we highlight the upcoming PreVote optimisation in etcd, which might have prevented the outage from happening in the first place.},
booktitle = {Proceedings of the 1st {Workshop} on {High} {Availability} and {Observability} of {Cloud} {Systems}},
publisher = {Association for Computing Machinery},
author = {Jensen, Chris and Howard, Heidi and Mortier, Richard},
month = apr,
year = {2021},
keywords = {Cloudflare, etcd, Partial-Partition, Raft},
pages = {11--17},
}
@misc{whittakerEPaxosDependencySet2021,
title = {{EPaxos} {Dependency} {Set} {Compaction} {Bug}},
url = {https://github.com/mwhittaker/bipartisan_paxos/blob/cbd99cc735215d18c163dc41cb0a05edcb55437d/epaxos_bugs/epaxos_dependency_bug.pdf},
abstract = {Bipartisan Paxos},
urldate = {2021-09-16},
author = {Whittaker, Michael},
month = sep,
year = {2021},
note = {original-date: 2018-11-03T04:31:20Z},
}
@article{neuEbbandFlowProtocolsResolution2021,
title = {Ebb-and-{Flow} {Protocols}: {A} {Resolution} of the {Availability}-{Finality} {Dilemma}},
shorttitle = {Ebb-and-{Flow} {Protocols}},
url = {http://arxiv.org/abs/2009.04987},
abstract = {The CAP theorem says that no blockchain can be live under dynamic participation and safe under temporary network partitions. To resolve this availability-finality dilemma, we formulate a new class of flexible consensus protocols, ebb-and-flow protocols, which support a full dynamically available ledger in conjunction with a finalized prefix ledger. The finalized ledger falls behind the full ledger when the network partitions but catches up when the network heals. Gasper, the current candidate protocol for Ethereum 2.0's beacon chain, combines the finality gadget Casper FFG with the LMD GHOST fork choice rule and aims to achieve this property. However, we discovered an attack in the standard synchronous network model, highlighting a general difficulty with existing finality-gadget-based designs. We present a construction of provably secure ebb-and-flow protocols with optimal resilience. Nodes run an off-the-shelf dynamically available protocol, take snapshots of the growing available ledger, and input them into a separate off-the-shelf BFT protocol to finalize a prefix. We explore connections with flexible BFT and improve upon the state-of-the-art for that problem.},
urldate = {2021-09-16},
journal = {arXiv:2009.04987 [cs]},
author = {Neu, Joachim and Tas, Ertem Nusret and Tse, David},
month = feb,
year = {2021},
note = {arXiv: 2009.04987},
keywords = {Computer Science - Cryptography and Security, Computer Science - Distributed, Parallel, and Cluster Computing},
}
@article{buterinCombiningGHOSTCasper2020,
title = {Combining {GHOST} and {Casper}},
url = {http://arxiv.org/abs/2003.03052},
abstract = {We present "Gasper," a proof-of-stake-based consensus protocol, which is an idealized version of the proposed Ethereum 2.0 beacon chain. The protocol combines Casper FFG, a finality tool, with LMD GHOST, a fork-choice rule. We prove safety, plausible liveness, and probabilistic liveness under different sets of assumptions.},
urldate = {2021-09-16},
journal = {arXiv:2003.03052 [cs]},
author = {Buterin, Vitalik and Hernandez, Diego and Kamphefner, Thor and Pham, Khiem and Qiao, Zhi and Ryan, Danny and Sin, Juhyeok and Wang, Ying and Zhang, Yan X.},
month = may,
year = {2020},
note = {arXiv: 2003.03052},
keywords = {68W15, Computer Science - Cryptography and Security},
}
@article{shresthaRevisitingHBFTSpeculative2019,
title = {Revisiting {hBFT}: {Speculative} {Byzantine} {Fault} {Tolerance} with {Minimum} {Cost}},
shorttitle = {Revisiting {hBFT}},
url = {http://arxiv.org/abs/1902.08505},
abstract = {FaB Paxos[5] sets a lower bound of 5f + 1 replicas for any two-step consensus protocols tolerating f byzantine failures. Yet, hBFT[3] promises a two-step consensus protocol with only 3f + 1 replicas. As a result, it violates safety property of a consensus protocol. In this note, we review the lower bound set by FaB Paxos and present a simple execution scenario that produces a safety violation in hBFT. To demonstrate the scenario, we require a relatively simple setup with only 4 replicas and one view-change.},
urldate = {2021-09-16},
journal = {arXiv:1902.08505 [cs]},
author = {Shrestha, Nibesh and Kumar, Mohan and Duan, SiSi},
month = apr,
year = {2019},
note = {arXiv: 1902.08505},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing},
}
@article{duanHBFTSpeculativeByzantine2015,
title = {{hBFT}: {Speculative} {Byzantine} {Fault} {Tolerance} with {Minimum} {Cost}},
volume = {12},
issn = {1941-0018},
shorttitle = {{hBFT}},
doi = {10.1109/TDSC.2014.2312331},
abstract = {We present hBFT, a hybrid, Byzantine fault-tolerant, replicated state machine protocol with optimal resilience. Under normal circumstances, hBFT uses speculation, i.e., replicas directly adopt the order from the primary and send replies to the clients. As in prior work such as Zyzzyva, when replicas are out of order, clients can detect the inconsistency and help replicas converge on the total ordering. However, we take a different approach than previous work that has four distinct benefits: it requires many fewer cryptographic operations, it moves critical jobs to the clients with no additional costs, faulty clients can be detected and identified, and performance in the presence of client participation will not degrade as long as the primary is correct. The correctness is guaranteed by a three-phase checkpoint subprotocol similar to PBFT, which is tailored to our needs. The protocol is triggered by the primary when a certain number of requests are executed or by clients when they detect an inconsistency.},
number = {1},
journal = {IEEE Transactions on Dependable and Secure Computing},
author = {Duan, Sisi and Peisert, Sean and Levitt, Karl N.},
month = jan,
year = {2015},
note = {Conference Name: IEEE Transactions on Dependable and Secure Computing},
keywords = {client/server, Concurrent computing, Digital signatures, Distributed systems, fault tolerance, Fault tolerance, Fault tolerant systems, Protocols, Resilience, state machine replication, Switches},
pages = {58--70},
}
@inproceedings{enesEfficientReplicationTimestamp2021,
address = {Online Event United Kingdom},
title = {Efficient replication via timestamp stability},
isbn = {978-1-4503-8334-9},
url = {https://dl.acm.org/doi/10.1145/3447786.3456236},
doi = {10.1145/3447786.3456236},
abstract = {Modern web applications replicate their data across the globe and require strong consistency guarantees for their most critical data. These guarantees are usually provided via statemachine replication (SMR). Recent advances in SMR have focused on leaderless protocols, which improve the availability and performance of traditional Paxos-based solutions. We propose Tempo – a leaderless SMR protocol that, in comparison to prior solutions, achieves superior throughput and offers predictable performance even in contended workloads. To achieve these benefits, Tempo timestamps each application command and executes it only after the timestamp becomes stable, i.e., all commands with a lower timestamp are known. Both the timestamping and stability detection mechanisms are fully decentralized, thus obviating the need for a leader replica. Our protocol furthermore generalizes to partial replication settings, enabling scalability in highly parallel workloads. We evaluate the protocol in both real and simulated geo-distributed environments and demonstrate that it outperforms state-of-the-art alternatives.},
language = {en},
urldate = {2021-09-16},
booktitle = {Proceedings of the {Sixteenth} {European} {Conference} on {Computer} {Systems}},
publisher = {ACM},
author = {Enes, Vitor and Baquero, Carlos and Gotsman, Alexey and Sutra, Pierre},
month = apr,
year = {2021},
pages = {178--193},
}
@inproceedings{arunSpeedingConsensusChasing2017,
title = {Speeding up {Consensus} by {Chasing} {Fast} {Decisions}},
doi = {10.1109/DSN.2017.35},
abstract = {This paper proposes CAESAR, a novel multi-leader Generalized Consensus protocol for geographically replicated sites. The main goal of CAESAR is to overcome one of the major limitations of existing approaches, which is the significant performance degradation when application workload produces conflicting requests. CAESAR does that by changing the way a fast decision is taken: its ordering protocol does not reject a fast decision for a client request if a quorum of nodes reply with different dependency sets for that request. The effectiveness of CAESAR is demonstrated through an evaluation study performed on Amazon's EC2 infrastructure using 5 geo-replicated sites. CAESAR outperforms other multi-leader (e.g., EPaxos) competitors by as much as 1.7x in the presence of 30\% conflicting requests, and single-leader (e.g., Multi-Paxos) by up to 3.5x.},
booktitle = {2017 47th {Annual} {IEEE}/{IFIP} {International} {Conference} on {Dependable} {Systems} and {Networks} ({DSN})},
author = {Arun, Balaji and Peluso, Sebastiano and Palmieri, Roberto and Losa, Giuliano and Ravindran, Binoy},
month = jun,
year = {2017},
note = {ISSN: 2158-3927},
keywords = {Computer crashes, Consensus, Databases, Degradation, Delays, Geo-Replication, Paxos, Protocols, Reliability, Synchronization},
pages = {49--60},
}
@techreport{momoseForceLockingAttackSync2019,
title = {Force-{Locking} {Attack} on {Sync} {Hotstuff}},
url = {http://eprint.iacr.org/2019/1484},
abstract = {Blockchain, which realizes state machine replication (SMR), is a fundamental building block of decentralized systems, such as cryptocurrencies and smart contracts. These systems require a consensus protocol in their global-scale, public, and trustless networks. In such an environment, consensus protocols require high resiliency, which is the ability to tolerate a fraction of faulty replicas, and thus synchronous protocols have been gaining significant research attention recently. Abraham et al. proposed a simple and practical synchronous SMR protocol called Sync Hotstuff (to be presented in IEEE S{\textbackslash}\&P 2020). Sync Hotstuff achieves \$2{\textbackslash}Delta\$ latency, which is near optimal in a synchronous protocol, and its throughput without lock-step execution is comparable to that of partially synchronous protocols. Sync Hotstuff was presented under a standard synchronous model as well as under a weaker, but more realistic, model called mobile sluggish model. Sync Hotstuff also adopts an optimistic responsive mode, in which the latency is independent of \${\textbackslash}Delta\$. However, Sync Hotstuff has a critical security vulnerability with which an adversary can conduct double spending or denial-of-service attack. In this paper, we present an attack we call force-locking attack on Sync Hotstuff. This attack violates the safety, i.e., consistency of agreements, of the protocol under the standard synchronous model and the liveness, i.e., progress of agreements, of all versions of the protocol, including the mobile sluggish model and responsive mode. The force-locking attack is not only a specific attack on Sync Hotstuff but also on some general blockchain protocols. After describing the attack, we will present some refinements to prevent this attack. Our refinements remove the security vulnerability on Sync Hotstuff without any performance compromises. We will also provide formal proofs of the security for each model.},
number = {1484},
urldate = {2021-09-16},
author = {Momose, Atsuki and Cruz, Jason Paul},
year = {2019},
keywords = {attack, blockchain, consensus, cryptographic protocols, SMR},
}
@techreport{abrahamSyncHotStuffSimple2019,
title = {Sync {HotStuff}: {Simple} and {Practical} {Synchronous} {State} {Machine} {Replication}},
shorttitle = {Sync {HotStuff}},
url = {http://eprint.iacr.org/2019/270},
abstract = {Synchronous solutions for Byzantine Fault Tolerance (BFT) can tolerate up to minority faults. In this work, we present Sync HotStuff, a surprisingly simple and intuitive synchronous BFT solution that achieves consensus with a latency of \$2{\textbackslash}Delta\$ in the steady state (where \${\textbackslash}Delta\$ is a synchronous message delay upper bound). In addition, Sync HotStuff ensures safety in a weaker synchronous model in which the synchrony assumption does not have to hold for all replicas all the time. Moreover, Sync HotStuff has optimistic responsiveness, i.e., it advances at network speed when less than one-quarter of the replicas are not responding. Borrowing from practical partially synchronous BFT solutions, Sync HotStuff has a two-phase leader-based structure, and has been fully prototyped under the standard synchrony assumption. When tolerating a single fault, Sync HotStuff achieves a throughput of over 280 Kops/sec under typical network performance, which is comparable to the best known partially synchronous solution.},
number = {270},
urldate = {2021-09-16},
author = {Abraham, Ittai and Malkhi, Dahlia and Nayak, Kartik and Ren, Ling and Yin, Maofan},
year = {2019},
keywords = {blockchains, consensus protocols, Distributed computing, SMR, Synchrony},
}
@article{cachinBlockchainConsensusProtocols2017,
title = {Blockchain {Consensus} {Protocols} in the {Wild}},
url = {http://arxiv.org/abs/1707.01873},
abstract = {A blockchain is a distributed ledger for recording transactions, maintained by many nodes without central authority through a distributed cryptographic protocol. All nodes validate the information to be appended to the blockchain, and a consensus protocol ensures that the nodes agree on a unique order in which entries are appended. Consensus protocols for tolerating Byzantine faults have received renewed attention because they also address blockchain systems. This work discusses the process of assessing and gaining confidence in the resilience of a consensus protocols exposed to faults and adversarial nodes. We advocate to follow the established practice in cryptography and computer security, relying on public reviews, detailed models, and formal proofs; the designers of several practical systems appear to be unaware of this. Moreover, we review the consensus protocols in some prominent permissioned blockchain platforms with respect to their fault models and resilience against attacks. The protocol comparison covers Hyperledger Fabric, Tendermint, Symbiont, R3{\textasciitilde}Corda, Iroha, Kadena, Chain, Quorum, MultiChain, Sawtooth Lake, Ripple, Stellar, and IOTA.},
urldate = {2021-09-16},
journal = {arXiv:1707.01873 [cs]},
author = {Cachin, Christian and Vukolić, Marko},
month = jul,
year = {2017},
note = {arXiv: 1707.01873},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing},
}
@phdthesis{buchmanTendermintByzantineFault2016,
address = {Guelph, Ontario, Canada},
title = {Tendermint: {Byzantine} {Fault} {Tolerance} in the {Age} of {Blockchains}},
url = {https://atrium.lib.uoguelph.ca/xmlui/handle/10214/9769},
school = {University of Guelph},
author = {Buchman, Ethan},
month = jun,
year = {2016},
}
@article{bergerMakingReadsBFT2021,
title = {Making {Reads} in {BFT} {State} {Machine} {Replication} {Fast}, {Linearizable}, and {Live}},
url = {http://arxiv.org/abs/2107.11144},
abstract = {Practical Byzantine Fault Tolerance (PBFT) is a seminal state machine replication protocol that achieves a performance comparable to non-replicated systems in realistic environments. A reason for such high performance is the set of optimizations introduced in the protocol. One of these optimizations is read-only requests, a particular type of client request which avoids running the three-step agreement protocol and allows replicas to respond directly, thus reducing the latency of reads from five to two communication steps. Given PBFT's broad influence, its design and optimizations influenced many BFT protocols and systems that followed, e.g., BFT-SMaRt. We show, for the first time, that the read-only request optimization introduced in PBFT more than 20 years ago can violate its liveness. Notably, the problem affects not only the optimized read-only operations but also standard, totally-ordered operations. We show this weakness by presenting an attack in which a malicious leader blocks correct clients and present two solutions for patching the protocol, making read-only operations fast and correct. The two solutions were implemented on BFT-SMaRt and evaluated in different scenarios, showing their effectiveness in preventing the identified attack.},
urldate = {2021-11-10},
journal = {arXiv:2107.11144 [cs]},
author = {Berger, Christian and Reiser, Hans P. and Bessani, Alysson},
month = jul,
year = {2021},
note = {arXiv: 2107.11144},
keywords = {Computer Science - Distributed, Parallel, and Cluster Computing},
}
@inproceedings {castroPracticalByzantineFault1999,
author = {Miguel Castro and Barbara Liskov},
title = {Practical Byzantine Fault Tolerance},
booktitle = {3rd Symposium on Operating Systems Design and Implementation ({OSDI} 99)},
year = {1999},
address = {New Orleans, LA},
url = {https://www.usenix.org/conference/osdi-99/practical-byzantine-fault-tolerance},
publisher = {{USENIX} Association},
month = feb,
}
@inproceedings {Imbs2011,
author = {Damien Imbs and Michel Raynal},
title = {Software transactional memories: an approach for multicore programming},
booktitle = {The Journal of Supercomputing},
year = {2010},
note = {DOI: https://doi.org/10.1007/s11227-010-0388-0},
month = feb,
}
@inproceedings {Belyaev2010,
author = {Alexey Belyaev},
title = {Верификация алгоритма поддержки транзакционной памяти {[Verifying an algorithm for transactional memory support]}},
booktitle = {Информатика, телекоммуникации и управление {[Informatics, telecommunications and control]}. 2010. №3 (101)},
year = {2010},
url = {https://cyberleninka.ru/article/n/verifikatsiya-algoritma-podderzhki-tranzaktsionnoy-pamyati},
month = feb,
language = {russian},
}
@inproceedings{ClarkeGHJLMN93,
author = {Edmund M. Clarke and
Orna Grumberg and
Hiromi Hiraishi and
Somesh Jha and
David E. Long and
Kenneth L. McMillan and
Linda A. Ness},
title = {Verification of the Futurebus+ Cache Coherence Protocol},
booktitle = {Computer Hardware Description Languages and their Applications, Proceedings
of the 11th {IFIP} {WG10.2} International Conference on Computer Hardware
Description Languages and their Applications - {CHDL} '93, sponsored
by {IFIP} {WG10.2} and in cooperation with {IEEE} COMPSOC, Ottawa,
Ontario, Canada, 26-28 April, 1993},
pages = {15--30},
year = {1993},
timestamp = {Thu, 03 Jan 2002 11:54:34 +0100},
biburl = {https://dblp.org/rec/bib/conf/chdl/ClarkeGHJLMN93},
bibsource = {dblp computer science bibliography, https://dblp.org},
}
@inproceedings{IEEE8961Futurebus,
title = {IEEE 896.1-1991 IEEE Standard for Futurebus+(R) -- Logical Protocol Specification},
year = {1992},
url = {https://standards.ieee.org/ieee/896.1/1269/}
}