-
Notifications
You must be signed in to change notification settings - Fork 1
/
draft-ietf-grow-ix-bgp-route-server-operations-03.xml
executable file
·740 lines (696 loc) · 33.6 KB
/
draft-ietf-grow-ix-bgp-route-server-operations-03.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
<?xml version="1.0" encoding="us-ascii"?>
<!DOCTYPE rfc PUBLIC '' "http://xml.resource.org/authoring/rfc2629.dtd"[
<!ENTITY I-D.ietf-idr-ix-bgp-route-server PUBLIC '' "http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-idr-ix-bgp-route-server.xml">
<!ENTITY RFC1997 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.1997.xml">
<!ENTITY RFC2119 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml">
<!ENTITY RFC2622 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.2622.xml">
<!ENTITY RFC4271 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4271.xml">
<!ENTITY RFC4360 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4360.xml">
<!ENTITY RFC4456 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4456.xml">
<!ENTITY RFC4893 PUBLIC '' "http://xml.resource.org/public/rfc/bibxml/reference.RFC.4893.xml">
]>
<?xml-stylesheet type='text/xsl'
href="http://greenbytes.de/tech/webdav/rfc2629xslt/rfc2629.xslt" ?>
<?rfc strict="yes" ?>
<?rfc toc="yes"?>
<!-- generate a ToC -->
<?rfc tocdepth="4"?>
<!-- the number of levels of subsections in ToC. default: 3 -->
<!-- control references -->
<?rfc symrefs="yes"?>
<!-- use symbolic references tags, i.e, [RFC2119] instead of [1] -->
<?rfc sortrefs="yes" ?>
<!-- sort the reference entries alphabetically -->
<!-- control vertical white space
(using these PIs as follows is recommended by the RFC Editor) -->
<?rfc compact="yes" ?>
<!-- do not start each main section on a new page -->
<?rfc subcompact="no" ?>
<!-- keep one blank line between list items -->
<!-- end of list of popular I-D processing instructions -->
<rfc category="info"
docName="draft-ietf-grow-ix-bgp-route-server-operations-03"
ipr="trust200902"
obsoletes=""
updates=""
submissionType="IETF"
xml:lang="en">
<!-- category values: std, bcp, info, exp, and historic
ipr values: full3667, noModification3667, noDerivatives3667
you can add the attributes updates="NNNN" and obsoletes="NNNN"
they will automatically be output with "(if approved)" -->
<!-- ***** FRONT MATTER ***** -->
<front>
<title abbrev="IX BGP Route Server Operations">
Internet Exchange Route Server Operations
</title>
<author initials="N" surname="Hilliard" fullname="Nick Hilliard">
<organization>INEX</organization>
<address>
<postal>
<street>4027 Kingswood Road</street>
<city>Dublin</city>
<code>24</code>
<country>IE</country>
</postal>
<email>nick@inex.ie</email>
</address>
</author>
<author initials="E" surname="Jasinska" fullname="Elisa Jasinska">
<organization>Netflix, Inc</organization>
<address>
<postal>
<street>100 Winchester Circle</street>
<city>Los Gatos</city>
<region>CA</region>
<code>95032</code>
<country>USA</country>
</postal>
<email>elisa@netflix.com</email>
</address>
</author>
<author initials="R" surname="Raszuk" fullname="Robert Raszuk">
<organization>NTT I3</organization>
<address>
<postal>
<street>101 S Ellsworth Avenue Suite 350</street>
<city>San Mateo</city>
<region>CA</region>
<code>94401</code>
<country>US</country>
</postal>
<email>robert@raszuk.net</email>
</address>
</author>
<author initials="N" surname="Bakker" fullname="Niels Bakker">
<organization>Akamai Technologies B.V.</organization>
<address>
<postal>
<street>Kingsfordweg 151</street>
<city>Amsterdam</city>
<code>1043 GR</code>
<country>NL</country>
</postal>
<email>nbakker@akamai.com</email>
</address>
</author>
<date month="September" year="2014" />
<area>Routing</area>
<workgroup>GROW Working Group</workgroup>
<keyword>I-D</keyword>
<keyword>Internet-Draft</keyword>
<keyword>GROW</keyword>
<abstract>
<t>
The popularity of Internet exchange points (IXPs) brings new
challenges to interconnecting networks. While bilateral eBGP sessions
between exchange participants were historically the most common means
of exchanging reachability information over an IXP, the overhead
associated with this interconnection method causes serious operational
and administrative scaling problems for IXP participants.
</t>
<t>
Multilateral interconnection using Internet route servers can
dramatically reduce the administrative and operational overhead of IXP
participation and these systems used by many IXP participants as a
preferred means of exchanging routing information.
</t>
<t>
This document describes operational considerations for multilateral
interconnections at IXPs.
</t>
</abstract>
</front>
<middle>
<section title="Introduction">
<t>
Internet exchange points (IXPs) provide IP data interconnection
facilities for their participants, typically using shared Layer-2
networking media such as Ethernet. The Border Gateway Protocol (BGP)
<xref target="RFC4271" /> is normally used to facilitate exchange of
network reachability information over these media.
</t>
<t>
As bilateral interconnection between IXP participants requires
operational and administrative overhead, BGP route servers <xref
target="I-D.ietf-idr-ix-bgp-route-server" /> are often deployed by IXP
operators to provide a simple and convenient means of interconnecting
IXP participants with each other. A route server redistributes
prefixes received from its BGP clients to other clients according to a
pre-specified policy, and it can be viewed as similar to an eBGP
equivalent of an iBGP <xref target="RFC4456" /> route reflector.
</t>
<t>
Route servers at IXPs require careful management and it is important
for route server operators to thoroughly understand both how they work
and what their limitations are. In this document, we discuss several
issues of operational relevance to route server operators and provide
recommendations to help route server operators provision a reliable
interconnection service.
</t>
<section title="Notational Conventions">
<t>
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and
"OPTIONAL" in this document are to be interpreted as described in
<xref target="RFC2119" />.
</t>
</section>
</section>
<section title="Bilateral BGP Sessions">
<t>
Bilateral interconnection is a method of interconnecting routers using
individual BGP sessions between each participant router on an IXP, in
order to exchange reachability information. If an IXP participant
wishes to implement an open interconnection policy - i.e. a policy of
interconnecting with as many other IXP participants as possible - it
is necessary for the participant to liaise with each of their intended
interconnection partners. Interconnection can then be implemented
bilaterally by configuring a BGP session on both participants' routers
to exchange network reachability information. If each exchange
participant interconnects with each other participant, a full mesh of
BGP sessions is needed, as shown in <xref target="ixp_interconnection"
/>.
</t>
<figure title="Full-Mesh Interconnection at an IXP" anchor="ixp_interconnection">
<preamble></preamble>
<artwork align="center">
___ ___
/ \ / \
..| AS1 |..| AS2 |..
: \___/____\___/ :
: | \ / | :
: | \ / | :
: IXP | \/ | :
: | /\ | :
: | / \ | :
: _|_/____\_|_ :
: / \ / \ :
..| AS3 |..| AS4 |..
\___/ \___/
</artwork>
<postamble></postamble>
</figure>
<t>
<xref target="ixp_interconnection" /> depicts an IXP platform with
four connected routers, administered by four separate exchange
participants, each of them with a locally unique autonomous system
number: AS1, AS2, AS3 and AS4. Each of these four participants wishes
to exchange traffic with all other participants; this is accomplished
by configuring a full mesh of BGP sessions on each router connected to
the exchange, resulting in 6 BGP sessions across the IXP fabric.
</t>
<t>
The number of BGP sessions at an exchange has an upper bound of
n*(n-1)/2, where n is the number of routers at the exchange. As many
exchanges have large numbers of participating networks, the amount of
administrative and operation overhead required to implement an open
interconnection scales quadratically. New participants
to an IXP require significant initial resourcing in order to gain
value from their IXP connection, while existing exchange participants
need to commit ongoing resources in order to benefit from
interconnecting with these new participants.
</t>
</section>
<section title="Multilateral Interconnection">
<t>
Multilateral interconnection is implemented using a route server
configured to use BGP to distribute network layer reachability
information (NLRI) among all client routers. The route server
preserves the BGP NEXT_HOP attribute from all received NLRI UPDATE
messages, and passes these messages with unchanged NEXT_HOP to its
route server clients, according to its configured routing policy,
as described in <xref target="I-D.ietf-idr-ix-bgp-route-server" />.
Using this method of exchanging NLRI messages, an IXP participant
router can receive an aggregated list of prefixes from all other route
server clients using a single BGP session to the route server instead
of depending on BGP sessions with each other router at the exchange.
This reduces the overall number of BGP sessions at an Internet
exchange from n*(n-1)/2 to n, where n is the number of routers at the
exchange.
</t>
<t>
Although a route server uses BGP to exchange reachability information
with each of its clients, it does not forward traffic itself and is
therefore not a router.
</t>
<t>
In practical terms, this allows dense interconnection between IXP
participants with low administrative overhead and significantly
simpler and smaller router configurations. In particular, new IXP
participants benefit from immediate and extensive interconnection,
while existing route server participants receive reachability
information from these new participants without necessarily having to
modify their configurations.
</t>
<figure title="IXP-based Interconnection with Route Server" anchor="rs_interconnection">
<preamble></preamble>
<artwork align="center">
___ ___
/ \ / \
..| AS1 |..| AS2 |..
: \___/ \___/ :
: \ / :
: \ / :
: \__/ :
: IXP / \ :
: | RS | :
: \____/ :
: / \ :
: / \ :
: __/ \__ :
: / \ / \ :
..| AS3 |..| AS4 |..
\___/ \___/
</artwork>
<postamble></postamble>
</figure>
<t>
As illustrated in <xref target="rs_interconnection" />, each router on
the IXP fabric requires only a single BGP session to the route server,
from which it can receive reachability information for all other
routers on the IXP which also connect to the route server.
</t>
</section>
<section title="Operational Considerations for Route Server Installations" anchor="ops_considerations">
<section title="Path Hiding" anchor="path_hiding">
<t>
"Path hiding" is a term used in <xref
target="I-D.ietf-idr-ix-bgp-route-server" /> to describe the process
whereby a route server may mask individual paths by applying
conflicting routing policies to its Loc-RIB. When this happens,
route server clients receive incomplete information from the route
server about network reachability.
</t>
<t>
There are several approaches which may be used to mitigate against
the effect of path hiding; these are described in <xref
target="I-D.ietf-idr-ix-bgp-route-server" />. However, the only
method which does not require explicit support from the route server
client is for the route server itself to maintain a individual
Loc-RIB for each client which is the subject of conflicting routing
policies.
</t>
</section>
<section title="Route Server Scaling" anchor="ops_scaling">
<t>
While deployment of multiple Loc-RIBs on the route server presents a
simple way to avoid the path hiding problem noted in <xref
target="path_hiding" />, this approach requires significantly more
computing resources on the route server than where a single Loc-RIB
is deployed for all clients. As the <xref target="RFC4271" /> BGP
decision process must be applied to all Loc-RIBs deployed on the
route server, both CPU and memory requirements on the host computer
scale approximately according to O(P * N), where P is the total
number of unique paths received by the route server and N is the
number of route server clients which require a unique Loc-RIB. As
this is a super-linear scaling relationship, large route servers may
derive benefit from deploying per-client Loc-RIBs only where they
are required.
</t>
<t>
Regardless of any Loc-RIB optimization technique is implemented, the
route server's control plane bandwidth requirements will scale
according to O(P * N), where P is the total number of unique paths
received by the route server and N is the total number of route
server clients. In the case where P_avg (the arithmetic mean number
of unique paths received per route server client) remains roughly
constant even as the number of connected clients increases, this
relationship can be rewritten as O((P_avg * N) * N) or O(N^2). This
quadratic upper bound on the network traffic requirements indicates
that the route server model will not scale to arbitrarily large
sizes.
</t>
<t>
This scaling analysis presents problems in three key areas: route
processor CPU overhead associated with BGP decision process
calculations, the memory requirements for handling many different
BGP path entries, and the network traffic bandwidth required to
distribute these prefixes from the route server to each route server
client.
</t>
<section title="Tackling Scaling Issues">
<t>
The network traffic scaling issue presents significant
difficulties with no clear solution - ultimately, each client
must receive a UPDATE for each unique prefix received by the
route server. However, there are several potential methods for
dealing with the CPU and memory resource requirements of route
servers.
</t>
<section title="View Merging and Decomposition">
<t>
View merging and decomposition, outlined in <xref
target="RS-ARCH" />, describes a method of optimising memory and
CPU requirements where multiple route server clients are subject
to exactly the same routing policies. In this situation, the
multiple Loc-RIB views required by each client are merged into a
single view.
</t>
<t>
There are several variations of this approach. If the route
server operator has prior knowledge of interconnection
relationships between route server clients, then the operator
may configure separate Loc-RIBs only for route server clients
with unique outbound routing policies. As this approach
requires prior knowledge of interconnection relationships, the
route server operator must depend on each client sharing their
interconnection policies, either in a internal provisioning
database controlled by the operator, or else in an external
data store such as an Internet Routing Registry Database.
</t>
<t>
Conversely, the route server implementation itself may implement
internal view decomposition by creating virtual Loc-RIBs based
on a single in-memory master Loc-RIB, with delta differences for
each prefix subject to different routing policies. This allows a
more granular and flexible approach to the problem of Loc-RIB
scaling, at the expense of requiring a more complex in-memory
Loc-RIB structure.
</t>
<t>
Whatever method of view merging and decomposition is chosen on
a route server, pathological edge cases can be created whereby
they will scale no better than fully non-optimised per-client
Loc-RIBs. However, as most route server clients connect to a
route server for the purposes of reducing overhead, rather
than implementing complex per-client routing policies, edge
cases tend not to arise in practice.
</t>
</section>
<section title="Destination Splitting">
<t>
Destination splitting, also described in <xref
target="RS-ARCH" />, describes a method for route server
clients to connect to multiple route servers and to send
non-overlapping sets of prefixes to each route server. As
each route server computes the best path for its own set of
prefixes, the quadratic scaling requirement operates on
multiple smaller sets of prefixes. This reduces the overall
computational and memory requirements for managing multiple
Loc-RIBs and performing the best-path calculation on each. In
order for this method to perform well, destination splitting
would require significant co-ordination between the route server
operator and each route server client. In practice, this level
of close co-ordination between IXP operators and their
participants tends not to occur, suggesting that the
approach is unlikely to be of any real use on production IXPs.
</t>
</section>
<section title="NEXT_HOP Resolution">
<t>
As route servers are usually deployed at IXPs which use flat
layer 2 networks, recursive resolution of the NEXT_HOP
attribute is generally not required, and can be replaced by a
simple check to ensure that the NEXT_HOP value for each prefix
is a network address on the IXP LAN's IP address range.
</t>
</section>
</section>
</section>
<section title="Prefix Leakage Mitigation" anchor="ops_leakage_mitigation">
<t>
Prefix leakage occurs when a BGP client unintentionally distributes
NLRI UPDATE messages to one or more neighboring BGP routers. Prefix
leakage of this form to a route server can cause serious
connectivity problems at an IXP if each route server client is
configured to accept all prefix UPDATE messages from the route
server. It is therefore RECOMMENDED when deploying route servers
that, due to the potential for collateral damage caused by NLRI
leakage, route server operators deploy prefix leakage mitigation
measures in order to prevent unintentional prefix announcements or
else limit the scale of any such leak. Although not foolproof,
per-client inbound prefix limits can restrict the damage caused by
prefix leakage in many cases. Per-client inbound prefix filtering on
the route server is a more deterministic and usually more reliable
means of preventing prefix leakage, but requires more administrative
resources to maintain properly.
</t>
<t>
If a route server operator implements per-client inbound prefix
filtering, then it is RECOMMENDED that the operator also builds in
mechanisms to automatically compare the Adj-RIB-In received from
each client with the inbound prefix lists configured for those
clients. Naturally, it is the responsibility of the route server
client to ensure that their stated prefix list is compatible with
what they announce to an IXP route server. However, many network
operators do not carefully manage their published routing policies
and it is not uncommon to see significant variation between the
two sets of prefixes. Route server operator visibility into this
discrepancy can provide significant advantages to both operator
and client.
</t>
</section>
<section title="Route Server Redundancy" anchor="ops_use_two_of_em">
<t>
As the purpose of an IXP route server implementation is to provide
a reliable reachability brokerage service, it is RECOMMENDED that
exchange operators who implement route server systems provision
multiple route servers on each shared Layer-2 domain. There is no
requirement to use the same BGP implementation or operating system
for each route server on the IXP fabric; however, it is
RECOMMENDED that where an operator provisions more than a single
server on the same shared Layer-2 domain, each route server
implementation be configured equivalently and in such a manner
that the path reachability information from each system is
identical.
</t>
</section>
<section title="AS_PATH Consistency Check">
<t>
<xref target="RFC4271" /> requires that every BGP speaker which
advertises a route to another external BGP speaker prepends its
own AS number as the last element of the AS_PATH sequence.
Therefore the leftmost AS in an AS_PATH attribute should be equal
to the autonomous system number of the BGP speaker which sent the
UPDATE message.
</t>
<t>
As <xref target="I-D.ietf-idr-ix-bgp-route-server" /> suggests
that route servers should not modify the AS_PATH attribute, a
consistency check on the AS_PATH of an UPDATE received by a route
server client would normally fail. It is therefore RECOMMENDED
that route server clients disable the AS_PATH consistency check
towards the route server.
</t>
</section>
<section title="Export Routing Policies">
<t>
Policy filtering is commonly implemented on route servers to
provide prefix distribution control mechanisms for route server
clients. A route server "export" policy is a policy which affects
prefixes sent from the route server to a route server client.
Several different strategies are commonly used for implementing
route server export policies.
</t>
<section title="BGP Communities">
<t>
Prefixes sent to the route server are tagged with specific <xref
target="RFC1997" /> or <xref target="RFC4360" /> BGP community
attributes, based on pre-defined values agreed between the
operator and all client. Based on these community tags,
prefixes may be propagated to all other clients, a subset of
clients, or none. This mechanism allows route server clients to
instruct the route server to implement per-client export routing
policies.
</t>
<t>
As both standard and extended BGP communities values are
restricted to 6 octets, the route server operator should take
care to ensure that the predefined BGP community values
mechanism used on their route server is compatible with <xref
target="RFC4893" /> 4-octet autonomous system numbers.
</t>
</section>
<section title="Internet Routing Registry">
<t>
Internet Routing Registry databases (IRRDBs) may be used by route
server operators to implement construct per-client routing
policies. <xref target="RFC2622" /> Routing Policy Specification
Language (RPSL) provides an comprehensive grammar for describing
interconnection relationships, and several toolsets exist which
can be used to translate RPSL policy description into route server
configurations.
</t>
</section>
<section title="Client-accessible Databases">
<t>
Should the route server operator not wish to use either BGP
community tags or the public IRRDBs for implementing client
export policies, they may implement their own routing policy
database system for managing their clients' requirements. A
database of this form SHOULD allow a route server client
operator to update their routing policy and provide a mechanism
for allowing the client to specify whether they wish to exchange
all their prefixes with any other route server client.
Optionally, the implementation may allow a client to specify
unique routing policies for individual prefixes over which they
have routing policy control.
</t>
</section>
</section>
<section title="Layer 2 Reachability Problems">
<t>
Layer 2 reachability problems on an IXP can cause serious
operational problems for IXP participants which depend on route
servers for interconnection. Ethernet switch forwarding bugs have
occasionally been observed to cause non-commutative reachability.
For example, given a route server and two IXP participants, A and B,
if the two participants can reach the route server but cannot reach
each other, then traffic between the participants may be dropped
until such time as the layer 2 forwarding problem is resolved. This
situation does not tend to occur in bilateral interconnection
arrangements, as the routing control path between the two hosts is
usually (but not always, due to IXP inter-switch connectivity load
balancing algorithms) the same as the data path between them.
</t>
<t>
Problems of this form can be dealt with using <xref
target="RFC5881" /> bidirectional forwarding detection. However, as
this is a bilateral protocol configured between routers, and as
there is currently no means for automatic configuration of BFD
between route server clients, BFD does not currently provide an
optimal means of handling the problem.
</t>
</section>
<section title="BGP NEXT_HOP Hijacking" anchor="nh_hijack">
<t>
Section 5.1.3(2) of <xref target="RFC4271" /> allows eBGP speakers
to change the NEXT_HOP address of an NLRI update to be a different
internet address on the same subnet. This is the mechanism which
allows route servers to operate on a shared layer 2 IXP network.
However, the mechanism can be abused by route server clients to
redirect traffic for their prefixes to other IXP participant
routers.
</t>
<figure title="BGP NEXT_HOP Hijacking using a Route Server" anchor="pic_nh_hijack">
<preamble></preamble>
<artwork align="center">
____
/ \
| AS99 |
\____/
/ \
/ \
__/ \__
/ \ / \
..| AS1 |..| AS2 |..
: \___/ \___/ :
: \ / :
: \ / :
: \__/ :
: IXP / \ :
: | RS | :
: \____/ :
: :
....................
</artwork>
<postamble></postamble>
</figure>
<t>
For example in <xref target="pic_nh_hijack" />, if AS1 and AS2
both announce prefixes for AS99 to the route server, AS1 could set
the NEXT_HOP address for AS99's prefixes to be the address of
AS2's router, thereby diverting traffic for AS99 via AS2. This
may override the routing policies of AS99 and AS2.
</t>
<t>
Worse still, if the route server operator does not use inbound
prefix filtering, AS1 could announce any arbitrary prefix to the
route server with a NEXT_HOP address of any other IXP
participant. This could be used as a denial of service mechanism
against either the users of the address space being announced by
illicitly diverting their traffic, or the other IXP participant by
overloading their network with traffic which would not normally be
sent there.
</t>
<t>
This problem is not specific to route servers and it can also be
implemented using bilateral peering sessions. However, the
potential damage is amplified by route servers because a single
BGP session can be used to affect many networks simultaneously.
</t>
<t>
Route server operators SHOULD check that the BGP NEXT_HOP
attribute for NLRIs received from a route server client matches
the interface address of the client. If the route server receives
an NLRI where these addresses are different and where the
announcing route server client is in a different autonomous system
to the route server client which uses the next hop address, the
NLRI SHOULD be dropped.
</t>
</section>
</section>
<section title="Security Considerations">
<t>
On route server installations which do not employ path hiding
mitigation techniques, the path hiding problem outlined in section
<xref target="path_hiding" /> can be used in certain circumstances
to proactively block third party prefix announcements from other route
server clients.
</t>
<t>
If the route server operator does not implement prefix leakage
mitigation as described in section <xref
target="ops_leakage_mitigation" />, it is trivial for route server
clients to implement denial of service attacks against arbitrary
Internet networks using a route server.
</t>
<t>
Route server installations SHOULD be secured against BGP NEXT_HOP
hijacking, as described in section <xref target="nh_hijack" />.
</t>
</section>
<section title="IANA Considerations">
<t>
There are no IANA considerations.
</t>
</section>
<section title="Acknowledgments">
<t>
The authors would like to thank Chris Hall, Ryan Bickhart, Steven
Bakker and Eduardo Ascenço Reis for their valuable input.
</t>
<t>
In addition, the authors would like to acknowledge the developers of
BIRD, OpenBGPD and Quagga, whose open source BGP implementations
include route server capabilities which are compliant with this
document.
</t>
</section>
</middle>
<back>
<references title="Normative References">
<?rfc include="reference.I-D.ietf-idr-ix-bgp-route-server"?>
<?rfc include="reference.RFC.2119"?> <!-- key words -->
</references>
<references title="Informative References">
<?rfc include="reference.RFC.1997"?> <!-- BGP Communities -->
<?rfc include="reference.RFC.2622"?> <!-- RPSL -->
<?rfc include="reference.RFC.4271"?> <!-- BGP-4 -->
<?rfc include="reference.RFC.4360"?> <!-- Extended Communities -->
<?rfc include="reference.RFC.4456"?> <!-- Route Reflector IBGP -->
<?rfc include="reference.RFC.4893"?> <!-- BGP Support for Four-octet AS Number Space -->
<?rfc include="reference.RFC.5881"?> <!-- BFD -->
<reference anchor="RS-ARCH"
target="http://www.cs.usc.edu/research/95-603.ps.Z">
<front>
<title>A Route Server Architecture for Inter-Domain Routing</title>
<author initials="R" surname="Govindan" fullname="Ramesh Govindan">
<organization>The University of Southern California</organization>
</author>
<author initials="C" surname="Alaettinoglu" fullname="Cengiz Alaettinoglu">
<organization>The University of Southern California</organization>
</author>
<author initials="K" surname="Varadhan" fullname="Kannan Varadhan">
<organization>The University of Southern California</organization>
</author>
<author initials="D" surname="Estrin" fullname="Deborah Estrin">
<organization>The University of Southern California</organization>
</author>
<date year="1995" />
</front>
</reference>
</references>
</back>
</rfc>