-
Notifications
You must be signed in to change notification settings - Fork 1
/
draft-jasinska-ix-bgp-route-server-operations-00.xml
executable file
·577 lines (536 loc) · 25.8 KB
/
draft-jasinska-ix-bgp-route-server-operations-00.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
<?xml version="1.0" encoding="us-ascii"?>
<!DOCTYPE rfc PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/rfc2629.dtd"[
<!ENTITY RFC2119 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2629 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml">
<!ENTITY RFC3552 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3552.xml">
<!ENTITY RFC3640 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.3640.xml">
]>
<?xml-stylesheet type='text/xsl'
href="http://greenbytes.de/tech/webdav/rfc2629xslt/rfc2629.xslt" ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
(using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="info"
docName="draft-jasinska-ix-bgp-route-server-operations-00"
ipr="trust200902"
obsoletes=""
updates=""
submissionType="IETF"
xml:lang="en">
<!-- category values: std, bcp, info, exp, and historic
ipr values: full3667, noModification3667, noDerivatives3667
you can add the attributes updates="NNNN" and obsoletes="NNNN"
they will automatically be output with "(if approved)" -->
<!-- ***** FRONT MATTER ***** -->
<front>
<title abbrev="IX BGP Route Server Operations">
Internet Exchange Route Server Operations
</title>
<author initials="E" surname="Jasinska" fullname="Elisa Jasinska">
<organization>Limelight Networks</organization>
<address>
<postal>
<street>2220 W 14th St</street>
<city>Tempe</city>
<region>AZ</region>
<code>85281</code>
<country>US</country>
</postal>
<email>elisa@llnw.com</email>
</address>
</author>
<author initials="N" surname="Hilliard" fullname="Nick Hilliard">
<organization>INEX</organization>
<address>
<postal>
<street>4027 Kingswood Road</street>
<city>Dublin</city>
<code>24</code>
<country>IE</country>
</postal>
<email>nick@inex.ie</email>
</address>
</author>
<author initials="R" surname="Raszuk" fullname="Robert Raszuk">
<organization>Cisco Systems</organization>
<address>
<postal>
<street>170 West Tasman Drive</street>
<city>San Jose</city>
<region>CA</region>
<code>95134</code>
<country>US</country>
</postal>
<email>raszuk@cisco.com</email>
</address>
</author>
<author initials="N" surname="Bakker" fullname="Niels Bakker">
<organization>AMS-IX B.V.</organization>
<address>
<postal>
<street>Westeinde 12</street>
<city>Amsterdam</city>
<region>NH</region>
<code>1017 ZN</code>
<country>NL</country>
</postal>
<email>niels.bakker@ams-ix.net</email>
</address>
</author>
<date month="March" year="2011" />
<area>Routing</area>
<workgroup>GROW Working Group</workgroup>
<keyword>I-D</keyword>
<keyword>Internet-Draft</keyword>
<keyword>GROW</keyword>
<abstract>
<t>
The growing popularity of Internet exchange points (IXPs) brings a new
set of requirements to interconnect participating networks. While
bilateral exterior BGP sessions between exchange participants were
previously the most common means of exchanging reachability
information, the overhead associated with dense interconnection has
caused substantial operational scaling problems for Internet exchange
point participants.
</t>
<t>
This document outlines a specification for multilateral
interconnections at IXPs. Multilateral interconnection is a
method of exchanging routing information between three or more BGP
speakers using a single intermediate broker system, referred to as a
route server. Route servers are typically used on shared access media
networks such as Internet exchange points (IXPs), to facilitate simplified
interconnection between multiple Internet routers on such a network.
</t>
</abstract>
</front>
<middle>
<section title="Introduction to Multilateral Interconnection">
<t>
Internet exchange points (IXPs) provide IP data interconnection
facilities for their participants, typically using shared Layer-2
networking media such as Ethernet. The Border Gateway Protocol (BGP)
<xref target="RFC4271" />, an inter-Autonomous System routing
protocol, is commonly used to facilitate exchange of network
reachability information over such media.
</t>
<t>
In the case of bilateral interconnection between two exchange
participant routers, each router must be configured with a BGP session
to the other. At IXPs with many participants who wish to implement
dense interconnection, this requirement can lead both to large router
configurations and high administrative overhead. Given the growth in
the number of participants at many IXPs, it has become operationally
troublesome to implement densely meshed interconnections at these
IXPs.
</t>
<t>
Multilateral interconnection is a method of interconnecting BGP
speaking routers using a third party brokering system, commonly
referred to as a route server and typically managed by the IXP
operator. Each of the multilateral interconnection participants
(usually referred to as route server clients) announces network
reachability information to the route server using exterior BGP, and
the route server in turn forwards this information to each other route
server client connected to it, according to its configuration.
Although a route server uses BGP to exchange reachability information
with each of its clients, it does not forward traffic itself and is
therefore not a router.
</t>
<t>
A route server can be viewed as similar in function to an <xref
target="RFC4456" /> route reflector, except that it operates using
EBGP instead of iBGP. Certain adaptions to <xref target="RFC4271" /> are
required, to enable an EBGP router to operate as a route server, which
are outlined in <xref target="spec" /> of this document.
Operational considerations to be taken into account in a route server
deployment are subject of <xref target="ops_considerations" />.
</t>
<t>
The term "route server" is often in a different context used to
describe a BGP node whose purpose is to accept BGP feeds from
multiple clients for the purpose of operational analysis and
troubleshooting. A system of this form may alternatively be known
as a "route collector" or a "route-views server". This document
uses the term "route server" exclusively to describe multilateral
peering brokerage systems.
</t>
<section title="Specification of Requirements">
<t>
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
"OPTIONAL" in this document are to be interpreted as described in
<xref target="RFC2119" />.
</t>
</section>
</section>
<section title="Bilateral Interconnection">
<t>
Bilateral interconnection is a method of interconnecting
routers using individual BGP sessions between each participant router
on an IXP in order to exchange reachability information. While
interconnection policies vary from participant to participant, most
IXPs have significant numbers of participants who see value in
interconnecting with as many other exchange participants as possible.
In order for an IXP participant to implement a dense interconnection
policy, it is necessary for the participant to liaise with each of
their intended interconnection partners and if this partner agrees to
interconnect, then both participants' routers must be configured with
a BGP session to exchange network reachability information. If each
exchange participant interconnects with each other participant, a full
mesh of BGP sessions is needed, as detailed in <xref
target="ixp_interconnection" />.
</t>
<figure title='Full-Mesh Interconnection at an IXP' anchor="ixp_interconnection">
<preamble></preamble>
<artwork align='center'>
___ ___
/ \ / \
..| AS1 |..| AS2 |..
: \___/____\___/ :
: | \ / | :
: | \ / | :
: IXP | \/ | :
: | /\ | :
: | / \ | :
: _|_/____\_|_ :
: / \ / \ :
..| AS3 |..| AS4 |..
\___/ \___/
</artwork>
<postamble></postamble>
</figure>
<t>
<xref target="ixp_interconnection" /> depicts an IXP platform with
four connected routers, administered by four separate exchange
participants, each of them with a locally unique autonomous system number:
AS1, AS2, AS3 and AS4. Each of these four participants wishes to
exchange traffic with all other participants; this is accomplished by
configuring a full mesh of BGP sessions on each router connected to
the exchange, resulting in 6 BGP sessions across the IXP fabric.
</t>
<t>
The number of BGP sessions at an exchange has an upper bound of
n*(n-1)/2, where n is the number of routers at the exchange. As many
exchanges have relatively large numbers of participating networks, the
quadratic scaling requirements of dense interconnection tend to cause
operational and administrative overhead at large IXPs. Consequently,
new participants to an IXP require significant initial resourcing in
order to gain value from their IXP connection, while existing exchange
participants need to commit ongoing resources in order to benefit from
interconnecting with these new participants.
</t>
</section>
<section title="Multilateral Interconnection">
<t>
Multilateral interconnection is implemented using a route server
configured to use BGP to distribute network layer reachability
information (NLRI) among all client routers. The route server
preserves the BGP NEXT_HOP attribute from all received NLRI UPDATE
messages, and passes these messages with unchanged NEXT_HOP to its
route server clients, according to its configured routing policy.
Using this method of exchanging NLRI messages, an IXP participant
router can receive an aggregated list of prefixes from all other route
server clients using a single BGP session to the route server instead
of depending on BGP sessions with each other router at the exchange.
This reduces the overall number of BGP sessions at an Internet
exchange from n*(n-1)/2 to n, where n is the number of routers at the
exchange.
</t>
<t>
In practical terms, this allows dense interconnection between IXP
participants with low administrative overhead and significantly
simpler and smaller router configurations. In particular, new IXP
participants benefit from immediate and extensive interconnection,
while existing route server participants receive reachability
information from these new participants without necessarily having to
adapt their configurations.
</t>
<figure title='IXP-based Interconnection with Route Server' anchor="rs_interconnection">
<preamble></preamble>
<artwork align='center'>
___ ___
/ \ / \
..| AS1 |..| AS2 |..
: \___/ \___/ :
: \ / :
: \ / :
: \__/ :
: IXP / \ :
: | RS | :
: \____/ :
: / \ :
: / \ :
: __/ \__ :
: / \ / \ :
..| AS3 |..| AS4 |..
\___/ \___/
</artwork>
<postamble></postamble>
</figure>
<t>
As illustrated in <xref target="rs_interconnection" />, each router on
the IXP fabric requires only a single BGP session to the route server,
from which it can receive reachability information for all other
routers on the IXP which also connect to the route server.
</t>
</section>
<section title="Technical Considerations for Route Server Implementations" anchor="spec">
EJ:
- ref to std!
- mention prefix hiding briefly!
</section>
<section title="Operational Considerations for Route Server Installations" anchor="ops_considerations">
<section title="Route Server Scaling" anchor="ops_scaling">
<t>
While deployment of multiple Loc-RIBs on the route server presents a
simple way to avoid the prefix hiding problem noted in <xref
target="prefix_hiding" />, this approach requires significantly more
computing resources on the route server than where a single Loc-RIB
is deployed for all clients. As the <xref target="RFC4271" />
Decision Process must be applied to all Loc-RIBs deployed on the
route server, both CPU and memory requirements on the host computer
scale approximately according to O(P * N), where P is the total
number of unique prefixes received by the route server and N is the
number of route server clients which require a unique Loc-RIB. As
this is a super-linear scaling relationship, large route servers
may derive benefit from deploying per-client
Loc-RIBs only where they are required.
</t>
<t>
Regardless of any Loc-RIB optimization implemented, the route
server's control plane bandwidth requirements will scale
according to O(P * N), where P is the total number of
unique prefixes received by the route server and N is the total
number of route server clients. In the case where P_avg (the arithmetic mean
number of unique prefixes received per route server client) remains
roughly constant even as the number of connected clients increases,
this relationship can be rewritten as O((P_avg * N) * N) or O(N^2).
This quadratic upper bound on the network traffic requirements
indicates that the route server model will not scale to
arbitrarily large sizes.
</t>
<section title="Tackling Scaling Issues">
<t>
The network traffic scaling issue presents significant
difficulties with no clear solution - ultimately, each client
must receive a UPDATE for each unique prefix received by the
route server. However, there are several potential methods for
dealing with the CPU and memory resource requirements of route
servers.
</t>
<section title="View Merging and Decomposition">
<t>
View merging and decomposition, outlined in <xref
target="RS-ARCH" />, describes a method of optimising
memory and CPU requirements where multiple route server clients
are subject to exactly the same routing policies. In this
situation, the multiple Loc-RIB views required by each client are
merged into a single view.
</t>
<t>
A variation of this approach may be implemented on
route servers by ensuring that separate Loc-RIBs are only
configured for route server clients with unique export peering
policies.
</t>
</section>
<section title="Destination Splitting">
<t>
Destination splitting, also described in <xref
target="RS-ARCH" />, describes a method for route server
clients to connect to multiple route servers and to send
non-overlapping sets of prefixes to each route server. As
each route server computes the best path for its own set of
prefixes, the quadratic scaling requirement operates on
multiple smaller sets of prefixes. This reduces the overall
computational and memory requirements for managing multiple
Loc-RIBs and performing the best-path calculation on each. In
order for this method to perform well, destination splitting
would require significant co-ordination between the route
server operator and each route server client. In practice,
such levels of co-ordination are unlikely to work
successfully, thereby diminishing the value of this approach.
</t>
</section>
<section title="NEXT_HOP Resolution">
<t>
As route servers are usually deployed at IXPs which use flat
layer 2 networks, recursive resolution of the NEXT_HOP
attribute is generally not required, and can be replaced by a
simple check to ensure that the NEXT_HOP value for each prefix
is a network address on the IXP LAN's IP address range.
</t>
</section>
</section>
</section>
<section title="NLRI Leakage Mitigation" anchor="ops_leakage_mitigation">
<t>
NLRI leakage occurs when a BGP client unintentionally distributes
NLRI UPDATE messages to one or more neighboring BGP routers. NLRI
leakage of this form to a route server can cause connectivity
problems at an IXP if each route server client is configured to
accept all prefix UPDATE messages from the route server. It is
therefore RECOMMENDED when deploying route servers that, due to the
potential for collateral damage caused by NLRI leakage, route server
operators deploy NLRI leakage mitigation measures in order to
prevent unintentional prefix announcements or else limit the scale
of any such leak. Although not foolproof, per-client inbound prefix
limits can restrict the damage caused by prefix leakage in many
cases. Per-client inbound prefix filtering on the route server is a
more deterministic and usually more reliable means of preventing
prefix leakage, but requires more administrative resources to
maintain properly.
</t>
</section>
<section title="Route Server Redundancy" anchor="ops_use_two_of_em">
<t>
As the purpose of an IXP route server implementation is to provide a
reliable reachability brokerage service, it is RECOMMENDED that
exchange operators who implement route server systems provision
multiple route servers on each shared Layer-2 domain. There is no
requirement to use the same BGP implementation or operating system
for each route server
on the IXP fabric; however, it is RECOMMENDED that where an
operator provisions more than a single server on the same shared
Layer-2 domain, each route server implementation be configured
equivalently and in such a manner that the path reachability
information from each system is identical.
</t>
</section>
<section title="AS_PATH Consistency Check">
<t>
As per <xref target="RFC4271" /> every BGP speaker who advertises a
route to another external BGP speaker prepends its own AS number as
the last element of the AS_PATH sequence. Therefore the leftmost
AS in an AS_PATH attribute is equal to the autonomous system number
of the BGP speaker that sent an UPDATE message.
</t>
<t>
<xref target="RFC4271" /> suggests in section 6.3 that a BGP speaker
MAY check the AS_PATH attribute of each UPDATE message received for
consistency, if the leftmost AS in the AS_PATH is in fact the one
of the sender.
</t>
<t>
Route servers do not modify the AS_PATH attribute (as described in
<xref target="as_path_attr" />), since they do not participate in the
traffic exchange. Therefore a consistency check on the AS_PATH of an
UPDATE received by a route server client would fail.
It is therefore RECOMMENDED that route server clients disable the
AS_PATH consistency check towards the route server.
</t>
</section>
<section title="Implementing Routing Policies">
<t>
Prefix filtering is commonly implemented on route servers to
provide prefix distribution control mechanisms for route server
clients. There are a few commonly used strategies available.
</t>
<section title="Communities">
<t>
Prefixes sent to the route server are tagged with certain
COMMUNITIES attributes agreed upon beforehand between the operator
and all participants. Based on the values, routes are propagated
to all other participants, a subset of participants, or none.
This allows for one-way filtering policies to be implemented
on the route server; if a participant chooses not to exchange routes
with a certain other participant, he will have to instruct the route
server to not announce his own routes and filter incoming routes on
his own router.
</t>
</section>
<section title="Internet Routing Registry">
<t>
Filters configured on the route server can be constructed by querying
an Internet Routing Registry database for RPSL <xref target="RFC2622" />
objects placed there by participating operators. Import and export
statements for the route server's ASN in an aut-num object define their
desired policy, from which the configured filters are derived.
</t>
</section>
</section>
</section>
<section title="Security Considerations">
<t>
On route server installations which do not employ prefix-hiding
mitigation techniques, the prefix hiding problem outlined in section
<xref target="prefix_hiding" /> can be used in certain circumstances
to proactively block third party prefix announcements from other route
server clients.
</t>
</section>
<section title="IANA Considerations">
<t>
The new set of mechanism for route servers does not require any new
allocations from IANA.
EJ: no new set of mechanisms in info draft
</t>
</section>
<section title="Acknowledgments">
<t>
The authors would like to thank Chris Hall, Ryan Bickhart and Steven
Bakker for their valuable input.
</t>
<t>
In addition, the authors would like to acknowledge the developers of
BIRD, OpenBGPD and Quagga, whose open source BGP implementations
include route server capabilities which are compliant with this
document.
</t>
</section>
</middle>
<back>
<references title="Normative References">
<?rfc include="reference.RFC.1997"?> <!-- BGP Communities -->
<?rfc include="reference.RFC.2119"?> <!-- key words -->
<?rfc include="reference.RFC.2622"?> <!-- RPSL -->
<?rfc include="reference.RFC.4271"?> <!-- BGP-4 -->
<?rfc include="reference.RFC.4360"?> <!-- Extendend Communities -->
<?rfc include="reference.RFC.4456"?> <!-- Route Reflector IBGP -->
<?rfc include="reference.I-D.ietf-idr-add-paths"?>
<?rfc include="reference.I-D.ietf-grow-diverse-bgp-path-dist"?>
<reference anchor="RS-ARCH"
target="http://www.cs.usc.edu/research/95-603.ps.Z">
<front>
<title>A Route Server Architecture for Inter-Domain Routing</title>
<author initials="R" surname="Govindan" fullname="Ramesh Govindan">
<organization>The University of Southern California</organization>
</author>
<author initials="C" surname="Alaettinoglu" fullname="Cengiz Alaettinoglu">
<organization>The University of Southern California</organization>
</author>
<author initials="K" surname="Varadhan" fullname="Kannan Varadhan">
<organization>The University of Southern California</organization>
</author>
<author initials="D" surname="Estrin" fullname="Deborah Estrin">
<organization>The University of Southern California</organization>
</author>
<date year="1995" />
</front>
</reference>
</references>
<references title="Informative References">
<?rfc include="reference.RFC.1863"?> <!-- old route server rfc -->
<?rfc include="reference.RFC.3418"?> <!-- MIB // should be referenced in MIB section-->
<?rfc include="reference.RFC.4223"?> <!-- Reclassification of RFC 1863 -->
<?rfc include="reference.RFC.4760"?> <!-- BGP-4 MP ext -->
<?rfc include="reference.RFC.5065"?> <!-- BGP confederations -->
<?rfc include="reference.RFC.5226"?> <!-- IANA Considerations Section Guidelines -->
</references>
</back>
</rfc>