From 3f9c7369f7112d87007b87a5faaa61cdd5e24c39 Mon Sep 17 00:00:00 2001 From: Donald Sharp Date: Tue, 19 May 2015 18:03:47 -0700 Subject: [PATCH] BGP: Add dynamic update group support This patch implements the 'update-groups' functionality in BGP. This is a function that can significantly improve BGP performance for Update generation and resultant network convergence. BGP Updates are formed for "groups" of peers and then replicated and sent out to each peer rather than being formed for each peer. Thus major BGP operations related to outbound policy application, adj-out maintenance and actual Update packet formation are optimized. BGP update-groups dynamically groups peers together based on configuration as well as run-time criteria. Thus, it is more flexible than update-formation based on peer-groups, which relies on operator configuration. [Note that peer-group based update formation has been introduced into BGP by Cumulus but is currently intended only for specific releases.] From 11098af65b2b8f9535484703e7f40330a71cbae4 Mon Sep 17 00:00:00 2001 Subject: [PATCH] updgrp commits --- bgpd/Makefile.am | 5 +- bgpd/bgp_advertise.c | 238 +---- bgpd/bgp_advertise.h | 44 +- bgpd/bgp_aspath.c | 1 + bgpd/bgp_attr.c | 62 +- bgpd/bgp_attr.h | 14 + bgpd/bgp_clist.c | 1 + bgpd/bgp_damp.c | 1 + bgpd/bgp_debug.c | 107 +- bgpd/bgp_debug.h | 8 +- bgpd/bgp_dump.c | 2 + bgpd/bgp_ecommunity.c | 1 + bgpd/bgp_filter.c | 1 + bgpd/bgp_fsm.c | 61 +- bgpd/bgp_fsm.h | 9 +- bgpd/bgp_main.c | 1 + bgpd/bgp_mpath.c | 4 +- bgpd/bgp_mplsvpn.c | 1 + bgpd/bgp_network.c | 1 + bgpd/bgp_nexthop.c | 1 + bgpd/bgp_open.c | 1 + bgpd/bgp_packet.c | 685 ++----------- bgpd/bgp_packet.h | 14 +- bgpd/bgp_regex.c | 1 + bgpd/bgp_route.c | 1019 +++++++++++++------ bgpd/bgp_route.h | 22 + bgpd/bgp_routemap.c | 123 +-- bgpd/bgp_table.c | 1 + bgpd/bgp_table.h | 16 + bgpd/bgp_updgrp.c | 1842 +++++++++++++++++++++++++++++++++++ bgpd/bgp_updgrp.h | 594 +++++++++++ bgpd/bgp_updgrp_adv.c | 765 +++++++++++++++ bgpd/bgp_updgrp_packet.c | 1136 +++++++++++++++++++++ bgpd/bgp_vty.c | 426 +++++++- bgpd/bgp_vty.h | 1 + bgpd/bgp_zebra.c | 1 + bgpd/bgpd.c | 355 ++++++- bgpd/bgpd.h | 211 +++- configure.ac | 11 + lib/hash.c | 25 + lib/hash.h | 6 + lib/memtypes.c | 4 + lib/plist.c | 2 +- lib/sockunion.c | 2 +- lib/sockunion.h | 2 +- lib/stream.c | 47 + lib/stream.h | 3 + tests/aspath_test.c | 1 + tests/bgp_capability_test.c | 1 + tests/bgp_mp_attr_test.c | 1 + tests/bgp_mpath_test.c | 1 + tests/ecommunity_test.c | 1 + 52 files changed, 6587 insertions(+), 1296 deletions(-) create mode 100644 bgpd/bgp_updgrp.c create mode 100644 bgpd/bgp_updgrp.h create mode 100644 bgpd/bgp_updgrp_adv.c create mode 100644 bgpd/bgp_updgrp_packet.c diff --git a/bgpd/Makefile.am b/bgpd/Makefile.am index 63836a1bd55a..b005a0caf1d4 100644 --- a/bgpd/Makefile.am +++ b/bgpd/Makefile.am @@ -16,14 +16,15 @@ libbgp_a_SOURCES = \ bgp_packet.c bgp_network.c bgp_filter.c bgp_regex.c bgp_clist.c \ bgp_dump.c bgp_snmp.c bgp_ecommunity.c bgp_mplsvpn.c bgp_nexthop.c \ bgp_damp.c bgp_table.c bgp_advertise.c bgp_vty.c bgp_mpath.c \ - bgp_nht.c + bgp_nht.c bgp_updgrp.c bgp_updgrp_packet.c bgp_updgrp_adv.c noinst_HEADERS = \ bgp_aspath.h bgp_attr.h bgp_community.h bgp_debug.h bgp_fsm.h \ bgp_network.h bgp_open.h bgp_packet.h bgp_regex.h bgp_route.h \ bgpd.h bgp_filter.h bgp_clist.h bgp_dump.h bgp_zebra.h \ bgp_ecommunity.h bgp_mplsvpn.h bgp_nexthop.h bgp_damp.h bgp_table.h \ - bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgp_nht.h + bgp_advertise.h bgp_snmp.h bgp_vty.h bgp_mpath.h bgp_nht.h \ + bgp_updgrp.h bgpd_SOURCES = bgp_main.c bgpd_LDADD = libbgp.a ../lib/libzebra.la @LIBCAP@ @LIBM@ diff --git a/bgpd/bgp_advertise.c b/bgpd/bgp_advertise.c index c71e83bc7223..bd60ca10a921 100644 --- a/bgpd/bgp_advertise.c +++ b/bgpd/bgp_advertise.c @@ -25,6 +25,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "prefix.h" #include "hash.h" #include "thread.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -36,11 +37,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_packet.h" #include "bgpd/bgp_fsm.h" #include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_updgrp.h" /* BGP advertise attribute is used for pack same attribute update into one packet. To do that we maintain attribute hash in struct peer. */ -static struct bgp_advertise_attr * +struct bgp_advertise_attr * baa_new (void) { return (struct bgp_advertise_attr *) @@ -64,7 +66,7 @@ baa_hash_alloc (void *p) return baa; } -static unsigned int +unsigned int baa_hash_key (void *p) { struct bgp_advertise_attr * baa = (struct bgp_advertise_attr *) p; @@ -72,7 +74,7 @@ baa_hash_key (void *p) return attrhash_key_make (baa->attr); } -static int +int baa_hash_cmp (const void *p1, const void *p2) { const struct bgp_advertise_attr * baa1 = p1; @@ -84,14 +86,14 @@ baa_hash_cmp (const void *p1, const void *p2) /* BGP update and withdraw information is stored in BGP advertise structure. This structure is referred from BGP adjacency information. */ -static struct bgp_advertise * +struct bgp_advertise * bgp_advertise_new (void) { - return (struct bgp_advertise *) + return (struct bgp_advertise *) XCALLOC (MTYPE_BGP_ADVERTISE, sizeof (struct bgp_advertise)); } -static void +void bgp_advertise_free (struct bgp_advertise *adv) { if (adv->binfo) @@ -99,7 +101,7 @@ bgp_advertise_free (struct bgp_advertise *adv) XFREE (MTYPE_BGP_ADVERTISE, adv); } -static void +void bgp_advertise_add (struct bgp_advertise_attr *baa, struct bgp_advertise *adv) { @@ -109,7 +111,7 @@ bgp_advertise_add (struct bgp_advertise_attr *baa, baa->adv = adv; } -static void +void bgp_advertise_delete (struct bgp_advertise_attr *baa, struct bgp_advertise *adv) { @@ -121,7 +123,7 @@ bgp_advertise_delete (struct bgp_advertise_attr *baa, baa->adv = adv->next; } -static struct bgp_advertise_attr * +struct bgp_advertise_attr * bgp_advertise_intern (struct hash *hash, struct attr *attr) { struct bgp_advertise_attr ref; @@ -134,7 +136,7 @@ bgp_advertise_intern (struct hash *hash, struct attr *attr) return baa; } -static void +void bgp_advertise_unintern (struct hash *hash, struct bgp_advertise_attr *baa) { if (baa->refcnt) @@ -153,216 +155,38 @@ bgp_advertise_unintern (struct hash *hash, struct bgp_advertise_attr *baa) } } -/* BGP adjacency keeps minimal advertisement information. */ -static void -bgp_adj_out_free (struct bgp_adj_out *adj) -{ - peer_unlock (adj->peer); /* adj_out peer reference */ - XFREE (MTYPE_BGP_ADJ_OUT, adj); -} - -int -bgp_adj_out_lookup (struct peer *peer, struct prefix *p, - afi_t afi, safi_t safi, struct bgp_node *rn) +struct bgp_adj_out * +bgp_adj_peer_lookup (struct peer *peer, struct bgp_node *rn) { struct bgp_adj_out *adj; + struct peer_af *paf; for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - break; - - if (! adj) - return 0; - - return (adj->adv - ? (adj->adv->baa ? 1 : 0) - : (adj->attr ? 1 : 0)); -} - -struct bgp_advertise * -bgp_advertise_clean (struct peer *peer, struct bgp_adj_out *adj, - afi_t afi, safi_t safi) -{ - struct bgp_advertise *adv; - struct bgp_advertise_attr *baa; - struct bgp_advertise *next; - struct bgp_advertise_fifo *fhead; - - adv = adj->adv; - baa = adv->baa; - next = NULL; - fhead = &peer->sync[afi][safi]->withdraw; - - if (baa) - { - /* Unlink myself from advertise attribute FIFO. */ - bgp_advertise_delete (baa, adv); - - /* Fetch next advertise candidate. */ - next = baa->adv; - - /* Unintern BGP advertise attribute. */ - bgp_advertise_unintern (peer->hash[afi][safi], baa); - - fhead = &peer->sync[afi][safi]->update; - } - - /* Unlink myself from advertisement FIFO. */ - BGP_ADV_FIFO_DEL (fhead, adv); - - /* Free memory. */ - bgp_advertise_free (adj->adv); - adj->adv = NULL; - - return next; + SUBGRP_FOREACH_PEER(adj->subgroup, paf) + if (paf->peer == peer) + return adj; + return NULL; } -void -bgp_adj_out_set (struct bgp_node *rn, struct peer *peer, struct prefix *p, - struct attr *attr, afi_t afi, safi_t safi, - struct bgp_info *binfo) -{ - struct bgp_adj_out *adj = NULL; - struct bgp_advertise *adv; - - if (DISABLE_BGP_ANNOUNCE) - return; - - /* Look for adjacency information. */ - if (rn) - { - for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - break; - } - - if (! adj) - { - adj = XCALLOC (MTYPE_BGP_ADJ_OUT, sizeof (struct bgp_adj_out)); - adj->peer = peer_lock (peer); /* adj_out peer reference */ - - if (rn) - { - BGP_ADJ_OUT_ADD (rn, adj); - bgp_lock_node (rn); - } - } - - if (adj->adv) - bgp_advertise_clean (peer, adj, afi, safi); - - adj->adv = bgp_advertise_new (); - - adv = adj->adv; - adv->rn = rn; - - assert (adv->binfo == NULL); - adv->binfo = bgp_info_lock (binfo); /* bgp_info adj_out reference */ - - if (attr) - adv->baa = bgp_advertise_intern (peer->hash[afi][safi], attr); - else - adv->baa = baa_new (); - adv->adj = adj; - - /* Add new advertisement to advertisement attribute list. */ - bgp_advertise_add (adv->baa, adv); - - BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->update, &adv->fifo); - - /* - * Schedule write thread (by triggering adjustment of MRAI timer) only if - * update FIFO has grown. Otherwise, it will be done upon the work queue - * being fully processed. Only adjust timer if needed. - */ - if (!BGP_ROUTE_ADV_HOLD(peer->bgp) && - (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->update) >= - peer->bgp->adv_quanta)) - { - if (!peer->radv_adjusted) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling MRAI timer after adj_out_set", peer->host); - bgp_adjust_routeadv(peer); - } - } -} - -void -bgp_adj_out_unset (struct bgp_node *rn, struct peer *peer, struct prefix *p, - afi_t afi, safi_t safi) +int +bgp_adj_out_lookup (struct peer *peer, struct prefix *p, + afi_t afi, safi_t safi, struct bgp_node *rn) { struct bgp_adj_out *adj; - struct bgp_advertise *adv; - - if (DISABLE_BGP_ANNOUNCE) - return; + struct peer_af *paf; - /* Lookup existing adjacency, if it is not there return immediately. */ for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - break; - - if (! adj) - return; - - /* Clearn up previous advertisement. */ - if (adj->adv) - bgp_advertise_clean (peer, adj, afi, safi); - - if (adj->attr) - { - /* We need advertisement structure. */ - adj->adv = bgp_advertise_new (); - adv = adj->adv; - adv->rn = rn; - adv->adj = adj; - - /* Add to synchronization entry for withdraw announcement. */ - BGP_ADV_FIFO_ADD (&peer->sync[afi][safi]->withdraw, &adv->fifo); - - /* - * Schedule write thread only if withdraw FIFO has grown. Otherwise, - * it will be done upon the work queue being fully processed. - */ - if (!BGP_ROUTE_ADV_HOLD(peer->bgp) && - (BGP_ADV_FIFO_COUNT(&peer->sync[afi][safi]->withdraw) >= - peer->bgp->wd_quanta)) - { - if (!peer->t_write) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling write thread after adj_out_unset", - peer->host); - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); - } - } - } - else - { - /* Remove myself from adjacency. */ - BGP_ADJ_OUT_DEL (rn, adj); - - /* Free allocated information. */ - bgp_adj_out_free (adj); + SUBGRP_FOREACH_PEER(adj->subgroup, paf) + if (paf->peer == peer) + { + return (adj->adv + ? (adj->adv->baa ? 1 : 0) + : (adj->attr ? 1 : 0)); + } - bgp_unlock_node (rn); - } + return 0; } -void -bgp_adj_out_remove (struct bgp_node *rn, struct bgp_adj_out *adj, - struct peer *peer, afi_t afi, safi_t safi) -{ - if (adj->attr) - bgp_attr_unintern (&adj->attr); - - if (adj->adv) - bgp_advertise_clean (peer, adj, afi, safi); - - BGP_ADJ_OUT_DEL (rn, adj); - bgp_adj_out_free (adj); -} void bgp_adj_in_set (struct bgp_node *rn, struct peer *peer, struct attr *attr) diff --git a/bgpd/bgp_advertise.h b/bgpd/bgp_advertise.h index 36ab576989f0..a1474374704c 100644 --- a/bgpd/bgp_advertise.h +++ b/bgpd/bgp_advertise.h @@ -21,6 +21,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #ifndef _QUAGGA_BGP_ADVERTISE_H #define _QUAGGA_BGP_ADVERTISE_H +struct update_subgroup; + /* BGP advertise FIFO. */ struct bgp_advertise_fifo { @@ -71,8 +73,14 @@ struct bgp_adj_out struct bgp_adj_out *next; struct bgp_adj_out *prev; - /* Advertised peer. */ - struct peer *peer; + /* Advertised subgroup. */ + struct update_subgroup *subgroup; + + /* Threading that makes the adj part of subgroup's adj queue */ + TAILQ_ENTRY(bgp_adj_out) subgrp_adj_train; + + /* Prefix information. */ + struct bgp_node *rn; /* Advertised attribute. */ struct attr *attr; @@ -149,13 +157,14 @@ struct bgp_synchronize #define BGP_ADV_FIFO_COUNT(F) \ (F)->count +#define BGP_ADV_FIFO_EMPTY(F) \ + (((struct bgp_advertise_fifo *)(F))->next == (struct bgp_advertise *)(F)) + +#define BGP_ADV_FIFO_HEAD(F) \ + ((((struct bgp_advertise_fifo *)(F))->next == (struct bgp_advertise *)(F)) \ + ? NULL : (F)->next) + /* Prototypes. */ -extern void bgp_adj_out_set (struct bgp_node *, struct peer *, struct prefix *, - struct attr *, afi_t, safi_t, struct bgp_info *); -extern void bgp_adj_out_unset (struct bgp_node *, struct peer *, struct prefix *, - afi_t, safi_t); -extern void bgp_adj_out_remove (struct bgp_node *, struct bgp_adj_out *, - struct peer *, afi_t, safi_t); extern int bgp_adj_out_lookup (struct peer *, struct prefix *, afi_t, safi_t, struct bgp_node *); @@ -163,10 +172,23 @@ extern void bgp_adj_in_set (struct bgp_node *, struct peer *, struct attr *); extern void bgp_adj_in_unset (struct bgp_node *, struct peer *); extern void bgp_adj_in_remove (struct bgp_node *, struct bgp_adj_in *); -extern struct bgp_advertise * -bgp_advertise_clean (struct peer *, struct bgp_adj_out *, afi_t, safi_t); - extern void bgp_sync_init (struct peer *); extern void bgp_sync_delete (struct peer *); +extern unsigned int baa_hash_key (void *p); +extern int baa_hash_cmp (const void *p1, const void *p2); +extern void bgp_advertise_add (struct bgp_advertise_attr *baa, + struct bgp_advertise *adv); +extern struct bgp_advertise *bgp_advertise_new (void); +extern void bgp_advertise_free (struct bgp_advertise *adv); +extern struct bgp_advertise_attr * +bgp_advertise_intern (struct hash *hash, struct attr *attr); +extern struct bgp_advertise_attr *baa_new (void); +extern void +bgp_advertise_delete (struct bgp_advertise_attr *baa, + struct bgp_advertise *adv); +extern void +bgp_advertise_unintern (struct hash *hash, struct bgp_advertise_attr *baa); +extern struct bgp_adj_out * +bgp_adj_peer_lookup (struct peer *peer, struct bgp_node *rn); #endif /* _QUAGGA_BGP_ADVERTISE_H */ diff --git a/bgpd/bgp_aspath.c b/bgpd/bgp_aspath.c index 450f5f52739a..0441f8ed8c9e 100644 --- a/bgpd/bgp_aspath.c +++ b/bgpd/bgp_aspath.c @@ -29,6 +29,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "stream.h" #include "jhash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" diff --git a/bgpd/bgp_attr.c b/bgpd/bgp_attr.c index 506583b3e5eb..e0c5cc89370d 100644 --- a/bgpd/bgp_attr.c +++ b/bgpd/bgp_attr.c @@ -29,6 +29,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "hash.h" #include "jhash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" @@ -38,6 +39,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_debug.h" #include "bgpd/bgp_packet.h" #include "bgpd/bgp_ecommunity.h" +#include "bgpd/bgp_updgrp.h" /* Attribute strings for logging. */ static const struct message attr_str [] = @@ -474,7 +476,8 @@ attrhash_cmp (const void *p1, const void *p2) && attr1->aspath == attr2->aspath && attr1->community == attr2->community && attr1->med == attr2->med - && attr1->local_pref == attr2->local_pref) + && attr1->local_pref == attr2->local_pref + && attr1->rmap_change_flags == attr2->rmap_change_flags) { const struct attr_extra *ae1 = attr1->extra; const struct attr_extra *ae2 = attr2->extra; @@ -607,6 +610,40 @@ bgp_attr_intern (struct attr *attr) return find; } +/** + * Increment the refcount on various structures that attr holds. + * Note on usage: call _only_ when the 'attr' object has already + * been 'intern'ed and exists in 'attrhash' table. The function + * serves to hold a reference to that (real) object. + * Note also that the caller can safely call bgp_attr_unintern() + * after calling bgp_attr_refcount(). That would release the + * reference and could result in a free() of the attr object. + */ +struct attr * +bgp_attr_refcount (struct attr *attr) +{ + /* Intern referenced strucutre. */ + if (attr->aspath) + attr->aspath->refcnt++; + + if (attr->community) + attr->community->refcnt++; + + if (attr->extra) + { + struct attr_extra *attre = attr->extra; + if (attre->ecommunity) + attre->ecommunity->refcnt++; + + if (attre->cluster) + attre->cluster->refcnt++; + + if (attre->transit) + attre->transit->refcnt++; + } + attr->refcnt++; + return attr; +} /* Make network statement's attribute. */ struct attr * @@ -1565,7 +1602,7 @@ bgp_mp_reach_parse (struct bgp_attr_parser_args *args, char buf1[INET6_ADDRSTRLEN]; char buf2[INET6_ADDRSTRLEN]; - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("%s sent two nexthops %s %s but second one is not a link-local nexthop", peer->host, inet_ntop (AF_INET6, &attre->mp_nexthop_global, buf1, INET6_ADDRSTRLEN), @@ -1716,7 +1753,7 @@ bgp_attr_unknown (struct bgp_attr_parser_args *args) const u_char flag = args->flags; const bgp_size_t length = args->length; - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("%s Unknown attribute is received (type %d, length %d)", peer->host, type, length); @@ -2098,6 +2135,7 @@ int stream_put_prefix (struct stream *, struct prefix *); size_t bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi, + struct bpacket_attr_vec_arr *vecarr, struct attr *attr) { size_t sizep; @@ -2118,10 +2156,12 @@ bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi, { case SAFI_UNICAST: case SAFI_MULTICAST: + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, 4); stream_put_ipv4 (s, attr->nexthop.s_addr); break; case SAFI_MPLS_VPN: + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, 12); stream_putl (s, 0); stream_putl (s, 0); @@ -2142,6 +2182,7 @@ bgp_packet_mpattr_start (struct stream *s, afi_t afi, safi_t safi, struct attr_extra *attre = attr->extra; assert (attr->extra); + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, attre->mp_nexthop_len); stream_put (s, &attre->mp_nexthop_global, 16); if (attre->mp_nexthop_len == 32) @@ -2194,6 +2235,7 @@ bgp_packet_mpattr_end (struct stream *s, size_t sizep) bgp_size_t bgp_packet_attribute (struct bgp *bgp, struct peer *peer, struct stream *s, struct attr *attr, + struct bpacket_attr_vec_arr *vecarr, struct prefix *p, afi_t afi, safi_t safi, struct peer *from, struct prefix_rd *prd, u_char *tag) { @@ -2202,6 +2244,7 @@ bgp_packet_attribute (struct bgp *bgp, struct peer *peer, struct aspath *aspath; int send_as4_path = 0; int send_as4_aggregator = 0; + int i = 0; int use32bit = (CHECK_FLAG (peer->cap, PEER_CAP_AS4_RCV)) ? 1 : 0; size_t mpattrlen_pos = 0; @@ -2213,7 +2256,7 @@ bgp_packet_attribute (struct bgp *bgp, struct peer *peer, if (p && !(afi == AFI_IP && safi == SAFI_UNICAST)) { - mpattrlen_pos = bgp_packet_mpattr_start(s, afi, safi, attr); + mpattrlen_pos = bgp_packet_mpattr_start(s, afi, safi, vecarr, attr); bgp_packet_mpattr_prefix(s, afi, safi, p, prd, tag); bgp_packet_mpattr_end(s, mpattrlen_pos); } @@ -2290,16 +2333,9 @@ bgp_packet_attribute (struct bgp *bgp, struct peer *peer, { stream_putc (s, BGP_ATTR_FLAG_TRANS); stream_putc (s, BGP_ATTR_NEXT_HOP); + bpacket_attr_vec_arr_set_vec (vecarr, BGP_ATTR_VEC_NH, s, attr); stream_putc (s, 4); - if (safi == SAFI_MPLS_VPN) - { - if (attr->nexthop.s_addr == 0) - stream_put_ipv4 (s, peer->nexthop.v4.s_addr); - else - stream_put_ipv4 (s, attr->nexthop.s_addr); - } - else - stream_put_ipv4 (s, attr->nexthop.s_addr); + stream_put_ipv4 (s, attr->nexthop.s_addr); } /* MED attribute. */ diff --git a/bgpd/bgp_attr.h b/bgpd/bgp_attr.h index 4285c7552223..57eca3abbbfd 100644 --- a/bgpd/bgp_attr.h +++ b/bgpd/bgp_attr.h @@ -116,8 +116,17 @@ struct attr /* Path origin attribute */ u_char origin; + + /* has the route-map changed any attribute? + Used on the peer outbound side. */ + u_int32_t rmap_change_flags; }; +/* rmap_change_flags definition */ +#define BATTR_RMAP_NEXTHOP_CHANGED (1 << 0) +#define BATTR_RMAP_NEXTHOP_PEER_ADDRESS (1 << 1) +#define BATTR_REFLECTED (1 << 2) + /* Router Reflector related structure. */ struct cluster_list { @@ -149,6 +158,8 @@ typedef enum { BGP_ATTR_PARSE_ERROR_NOTIFYPLS = -3, } bgp_attr_parse_ret_t; +struct bpacket_attr_vec_arr; + /* Prototypes. */ extern void bgp_attr_init (void); extern void bgp_attr_finish (void); @@ -162,6 +173,7 @@ extern void bgp_attr_dup (struct attr *, struct attr *); extern void bgp_attr_deep_dup (struct attr *, struct attr *); extern void bgp_attr_deep_free (struct attr *); extern struct attr *bgp_attr_intern (struct attr *attr); +extern struct attr *bgp_attr_refcount (struct attr *attr); extern void bgp_attr_unintern_sub (struct attr *); extern void bgp_attr_unintern (struct attr **); extern void bgp_attr_flush (struct attr *); @@ -172,6 +184,7 @@ extern struct attr *bgp_attr_aggregate_intern (struct bgp *, u_char, struct community *, int as_set, u_char); extern bgp_size_t bgp_packet_attribute (struct bgp *bgp, struct peer *, struct stream *, struct attr *, + struct bpacket_attr_vec_arr *vecarr, struct prefix *, afi_t, safi_t, struct peer *, struct prefix_rd *, u_char *); @@ -212,6 +225,7 @@ extern int bgp_mp_unreach_parse (struct bgp_attr_parser_args *args, * finally the _end() function. */ extern size_t bgp_packet_mpattr_start(struct stream *s, afi_t afi, safi_t safi, + struct bpacket_attr_vec_arr *vecarr, struct attr *attr); extern void bgp_packet_mpattr_prefix(struct stream *s, afi_t afi, safi_t safi, struct prefix *p, struct prefix_rd *prd, diff --git a/bgpd/bgp_clist.c b/bgpd/bgp_clist.c index 80564df4b7d7..33444c46f3ef 100644 --- a/bgpd/bgp_clist.c +++ b/bgpd/bgp_clist.c @@ -23,6 +23,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "command.h" #include "prefix.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_community.h" diff --git a/bgpd/bgp_damp.c b/bgpd/bgp_damp.c index 0ffafb7a085b..468a3e91bb78 100644 --- a/bgpd/bgp_damp.c +++ b/bgpd/bgp_damp.c @@ -26,6 +26,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "command.h" #include "log.h" #include "thread.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_damp.h" diff --git a/bgpd/bgp_debug.c b/bgpd/bgp_debug.c index 9033b94df457..27d7e69782c2 100644 --- a/bgpd/bgp_debug.c +++ b/bgpd/bgp_debug.c @@ -29,6 +29,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "sockunion.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" @@ -36,6 +37,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_attr.h" #include "bgpd/bgp_debug.h" #include "bgpd/bgp_community.h" +#include "bgpd/bgp_updgrp.h" unsigned long conf_bgp_debug_as4; unsigned long conf_bgp_debug_neighbor_events; @@ -46,6 +48,7 @@ unsigned long conf_bgp_debug_keepalive; unsigned long conf_bgp_debug_update; unsigned long conf_bgp_debug_zebra; unsigned long conf_bgp_debug_nht; +unsigned long conf_bgp_debug_update_groups; unsigned long term_bgp_debug_as4; unsigned long term_bgp_debug_neighbor_events; @@ -56,6 +59,7 @@ unsigned long term_bgp_debug_keepalive; unsigned long term_bgp_debug_update; unsigned long term_bgp_debug_zebra; unsigned long term_bgp_debug_nht; +unsigned long term_bgp_debug_update_groups; struct list *bgp_debug_neighbor_events_peers = NULL; struct list *bgp_debug_keepalive_peers = NULL; @@ -313,6 +317,12 @@ bgp_debug_list_has_entry(struct list *list, struct peer *peer, struct prefix *p) return 0; } +int +bgp_debug_peer_updout_enabled(struct peer *peer) +{ + return (bgp_debug_list_has_entry(bgp_debug_update_out_peers, peer, NULL)); +} + /* Dump attribute. */ int bgp_dump_attr (struct peer *peer, struct attr *attr, char *buf, size_t size) @@ -438,6 +448,15 @@ bgp_notify_print(struct peer *peer, struct bgp_notify *bgp_notify, bgp_notify->data ? bgp_notify->data : ""); } +static void +bgp_debug_clear_updgrp_update_dbg(struct bgp *bgp) +{ + if (!bgp) + bgp = bgp_get_default(); + update_group_walk (bgp, update_group_clear_update_dbg, NULL); +} + + /* Debug option setting interface. */ unsigned long bgp_debug_option = 0; @@ -912,7 +931,19 @@ DEFUN (debug_bgp_update_direct_peer, if (inbound) bgp_debug_list_add_entry(bgp_debug_update_in_peers, peer, NULL); else - bgp_debug_list_add_entry(bgp_debug_update_out_peers, peer, NULL); + { + struct peer_af *paf; + int af; + + bgp_debug_list_add_entry(bgp_debug_update_out_peers, peer, NULL); + PEERAF_FOREACH (peer, paf, af) + { + if (PAF_SUBGRP (paf)) + { + UPDGRP_PEER_DBG_EN(PAF_SUBGRP(paf)->update_group); + } + } + } if (vty->node == CONFIG_NODE) { @@ -1006,6 +1037,20 @@ DEFUN (no_debug_bgp_update_direct_peer, vty_out (vty, "BGP updates debugging (outbound) is off%s", VTY_NEWLINE); } } + + if (found_peer) + { + struct peer_af *paf; + int af; + + PEERAF_FOREACH (peer, paf, af) + { + if (PAF_SUBGRP (paf)) + { + UPDGRP_PEER_DBG_DIS(PAF_SUBGRP(paf)->update_group); + } + } + } } if (found_peer) @@ -1133,6 +1178,8 @@ DEFUN (no_debug_bgp_update, bgp_debug_list_free(bgp_debug_update_out_peers); bgp_debug_list_free(bgp_debug_update_prefixes); + bgp_debug_clear_updgrp_update_dbg(vty->index); + if (vty->node == CONFIG_NODE) { DEBUG_OFF (update, UPDATE_IN); @@ -1281,6 +1328,42 @@ DEFUN (no_debug_bgp_zebra_prefix, return CMD_SUCCESS; } +/* debug bgp update-groups */ +DEFUN (debug_bgp_update_groups, + debug_bgp_update_groups_cmd, + "debug bgp update-groups", + DEBUG_STR + BGP_STR + "BGP update-groups\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_ON (update_groups, UPDATE_GROUPS); + else + { + TERM_DEBUG_ON (update_groups, UPDATE_GROUPS); + vty_out (vty, "BGP update-groups debugging is on%s", VTY_NEWLINE); + } + return CMD_SUCCESS; +} + +DEFUN (no_debug_bgp_update_groups, + no_debug_bgp_update_groups_cmd, + "no debug bgp update-groups", + NO_STR + DEBUG_STR + BGP_STR + "BGP update-groups\n") +{ + if (vty->node == CONFIG_NODE) + DEBUG_OFF (update_groups, UPDATE_GROUPS); + else + { + TERM_DEBUG_OFF (update_groups, UPDATE_GROUPS); + vty_out (vty, "BGP update-groups debugging is off%s", VTY_NEWLINE); + } + return CMD_SUCCESS; +} + DEFUN (no_debug_bgp, no_debug_bgp_cmd, "no debug bgp", @@ -1295,6 +1378,8 @@ DEFUN (no_debug_bgp, bgp_debug_list_free(bgp_debug_update_prefixes); bgp_debug_list_free(bgp_debug_zebra_prefixes); + bgp_debug_clear_updgrp_update_dbg(vty->index); + TERM_DEBUG_OFF (keepalive, KEEPALIVE); TERM_DEBUG_OFF (update, UPDATE_IN); TERM_DEBUG_OFF (update, UPDATE_OUT); @@ -1350,6 +1435,9 @@ DEFUN (show_debugging_bgp, bgp_debug_list_print (vty, " BGP zebra debugging is on", bgp_debug_zebra_prefixes); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + vty_out (vty, " BGP update-groups debugging is on%s", VTY_NEWLINE); + vty_out (vty, "%s", VTY_NEWLINE); return CMD_SUCCESS; } @@ -1411,6 +1499,12 @@ bgp_config_write_debug (struct vty *vty) write++; } + if (CONF_BGP_DEBUG (update_groups, UPDATE_GROUPS)) + { + vty_out (vty, "debug bgp update-groups%s", VTY_NEWLINE); + write++; + } + return write; } @@ -1445,6 +1539,8 @@ bgp_debug_init (void) install_element (CONFIG_NODE, &debug_bgp_update_direct_cmd); install_element (ENABLE_NODE, &debug_bgp_zebra_cmd); install_element (CONFIG_NODE, &debug_bgp_zebra_cmd); + install_element (ENABLE_NODE, &debug_bgp_update_groups_cmd); + install_element (CONFIG_NODE, &debug_bgp_update_groups_cmd); /* deb bgp updates [in|out] A.B.C.D */ install_element (ENABLE_NODE, &debug_bgp_update_direct_peer_cmd); @@ -1491,6 +1587,8 @@ bgp_debug_init (void) install_element (CONFIG_NODE, &no_debug_bgp_update_cmd); install_element (ENABLE_NODE, &no_debug_bgp_zebra_cmd); install_element (CONFIG_NODE, &no_debug_bgp_zebra_cmd); + install_element (ENABLE_NODE, &no_debug_bgp_update_groups_cmd); + install_element (CONFIG_NODE, &no_debug_bgp_update_groups_cmd); install_element (ENABLE_NODE, &no_debug_bgp_cmd); } @@ -1577,7 +1675,8 @@ bgp_debug_keepalive (struct peer *peer) } int -bgp_debug_update (struct peer *peer, struct prefix *p, unsigned int inbound) +bgp_debug_update (struct peer *peer, struct prefix *p, + struct update_group *updgrp, unsigned int inbound) { if (inbound) { @@ -1593,6 +1692,10 @@ bgp_debug_update (struct peer *peer, struct prefix *p, unsigned int inbound) BGP_DEBUG_UPDATE_OUT, bgp_debug_update_out_peers)) return 1; + + /* Check if update debugging implicitly enabled for the group. */ + if (updgrp && UPDGRP_DBG_ON(updgrp)) + return 1; } diff --git a/bgpd/bgp_debug.h b/bgpd/bgp_debug.h index b8ad7b9cfa69..a80d00440e95 100644 --- a/bgpd/bgp_debug.h +++ b/bgpd/bgp_debug.h @@ -22,6 +22,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define _QUAGGA_BGP_DEBUG_H #include "bgp_attr.h" +#include "bgp_updgrp.h" /* sort of packet direction */ #define DUMP_ON 1 @@ -65,6 +66,7 @@ extern unsigned long conf_bgp_debug_keepalive; extern unsigned long conf_bgp_debug_update; extern unsigned long conf_bgp_debug_zebra; extern unsigned long conf_bgp_debug_nht; +extern unsigned long conf_bgp_debug_update_groups; extern unsigned long term_bgp_debug_as4; extern unsigned long term_bgp_debug_neighbor_events; @@ -73,6 +75,7 @@ extern unsigned long term_bgp_debug_keepalive; extern unsigned long term_bgp_debug_update; extern unsigned long term_bgp_debug_zebra; extern unsigned long term_bgp_debug_nht; +extern unsigned long term_bgp_debug_update_groups; extern struct list *bgp_debug_neighbor_events_peers; extern struct list *bgp_debug_keepalive_peers; @@ -98,6 +101,7 @@ struct bgp_debug_filter #define BGP_DEBUG_UPDATE_PREFIX 0x04 #define BGP_DEBUG_ZEBRA 0x01 #define BGP_DEBUG_NHT 0x01 +#define BGP_DEBUG_UPDATE_GROUPS 0x01 #define BGP_DEBUG_PACKET_SEND 0x01 #define BGP_DEBUG_PACKET_SEND_DETAIL 0x02 @@ -125,13 +129,15 @@ struct bgp_debug_filter extern const char *bgp_type_str[]; extern int bgp_dump_attr (struct peer *, struct attr *, char *, size_t); +extern int bgp_debug_peer_updout_enabled(struct peer *peer); extern void bgp_notify_print (struct peer *, struct bgp_notify *, const char *); extern const struct message bgp_status_msg[]; extern const int bgp_status_msg_max; extern int bgp_debug_neighbor_events(struct peer *peer); extern int bgp_debug_keepalive(struct peer *peer); -extern int bgp_debug_update(struct peer *peer, struct prefix *p, unsigned int inbound); +extern int bgp_debug_update(struct peer *peer, struct prefix *p, + struct update_group *updgrp, unsigned int inbound); extern int bgp_debug_zebra(struct prefix *p); #endif /* _QUAGGA_BGP_DEBUG_H */ diff --git a/bgpd/bgp_dump.c b/bgpd/bgp_dump.c index a3c9526fd793..9ee3285def3b 100644 --- a/bgpd/bgp_dump.c +++ b/bgpd/bgp_dump.c @@ -27,6 +27,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "prefix.h" #include "thread.h" #include "linklist.h" +#include "queue.h" + #include "bgpd/bgp_table.h" #include "bgpd/bgpd.h" diff --git a/bgpd/bgp_ecommunity.c b/bgpd/bgp_ecommunity.c index 8a326a8bb5fa..05e0c0172289 100644 --- a/bgpd/bgp_ecommunity.c +++ b/bgpd/bgp_ecommunity.c @@ -24,6 +24,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "memory.h" #include "prefix.h" #include "command.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_ecommunity.h" diff --git a/bgpd/bgp_filter.c b/bgpd/bgp_filter.c index fa0889cdbe60..ca9ce67e5f13 100644 --- a/bgpd/bgp_filter.c +++ b/bgpd/bgp_filter.c @@ -24,6 +24,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "memory.h" #include "buffer.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" diff --git a/bgpd/bgp_fsm.c b/bgpd/bgp_fsm.c index f70ad25aac4a..28cfb45af262 100644 --- a/bgpd/bgp_fsm.c +++ b/bgpd/bgp_fsm.c @@ -31,6 +31,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "memory.h" #include "plist.h" #include "workqueue.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" @@ -45,6 +46,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #ifdef HAVE_SNMP #include "bgpd/bgp_snmp.h" #endif /* HAVE_SNMP */ +#include "bgpd/bgp_updgrp.h" /* BGP FSM (finite state machine) has three types of functions. Type one is thread functions. Type two is event functions. Type three @@ -93,11 +95,17 @@ peer_xfer_conn(struct peer *from_peer) if (!peer || !CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE)) return from_peer; + if (bgp_debug_neighbor_events(peer)) + zlog_debug ("peer transfer (%s -> %s)", from_peer->host, peer->host); + BGP_WRITE_OFF(peer->t_write); BGP_READ_OFF(peer->t_read); BGP_WRITE_OFF(from_peer->t_write); BGP_READ_OFF(from_peer->t_read); + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_OFF(from_peer->t_routeadv); + fd = peer->fd; peer->fd = from_peer->fd; from_peer->fd = fd; @@ -390,27 +398,26 @@ bgp_keepalive_timer (struct thread *thread) static int bgp_routeq_empty (struct peer *peer) { - afi_t afi; - safi_t safi; + struct peer_af *paf; + int af; - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - { - if (!FIFO_EMPTY(&peer->sync[afi][safi]->withdraw) || - !FIFO_EMPTY(&peer->sync[afi][safi]->update)) - return 0; - } + PEERAF_FOREACH(peer, paf, af) + { + if (!PAF_SUBGRP(paf)) + continue; + if (!advertise_list_is_empty(PAF_SUBGRP(paf))) + return 0; + } return 1; } -static int +int bgp_routeadv_timer (struct thread *thread) { struct peer *peer; peer = THREAD_ARG (thread); peer->t_routeadv = NULL; - peer->radv_adjusted = 0; if (bgp_debug_neighbor_events(peer)) zlog_debug ("%s [FSM] Timer (routeadv timer expire)", peer->host); @@ -419,10 +426,9 @@ bgp_routeadv_timer (struct thread *thread) BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); - /* MRAI timer is no longer restarted here, it would be done - * when the FIFO is built. + /* MRAI timer will be started again when FIFO is built, no need to + * do it here. */ - return 0; } @@ -630,9 +636,6 @@ bgp_adjust_routeadv (struct peer *peer) return; } - /* Mark that we've adjusted the timer */ - peer->radv_adjusted = 1; - /* * CASE I: @@ -655,8 +658,6 @@ bgp_adjust_routeadv (struct peer *peer) { BGP_TIMER_OFF(peer->t_routeadv); BGP_TIMER_ON(peer->t_routeadv, bgp_routeadv_timer, 0); - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug ("%s: MRAI timer to expire instantly", peer->host); return; } @@ -685,8 +686,6 @@ bgp_adjust_routeadv (struct peer *peer) { BGP_TIMER_OFF(peer->t_routeadv); BGP_TIMER_ON(peer->t_routeadv, bgp_routeadv_timer, diff); - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug ("%s: MRAI timer to expire in %f secs", peer->host, diff); } } @@ -720,8 +719,6 @@ bgp_maxmed_onstartup_active (struct bgp *bgp) void bgp_maxmed_update (struct bgp *bgp) { - struct listnode *node, *nnode; - struct peer *peer; u_char maxmed_active; u_int32_t maxmed_value; @@ -747,8 +744,7 @@ bgp_maxmed_update (struct bgp *bgp) bgp->maxmed_active = maxmed_active; bgp->maxmed_value = maxmed_value; - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - bgp_announce_route_all (peer); + update_group_announce(bgp); } } @@ -1005,6 +1001,10 @@ bgp_stop (struct peer *peer) /* set last reset time */ peer->resettime = peer->uptime = bgp_clock (); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("%s remove from all update group", peer->host); + update_group_remove_peer_afs(peer); + #ifdef HAVE_SNMP bgpTrapBackwardTransition (peer); #endif /* HAVE_SNMP */ @@ -1339,6 +1339,7 @@ static int bgp_establish (struct peer *peer) { struct bgp_notify *notify; + struct peer_af *paf; afi_t afi; safi_t safi; int nsf_af_count = 0; @@ -1377,6 +1378,9 @@ bgp_establish (struct peer *peer) if (bgp_flag_check (peer->bgp, BGP_FLAG_LOG_NEIGHBOR_CHANGES)) zlog_info ("%%ADJCHANGE: neighbor %s Up", peer->host); + /* assign update-group/subgroup */ + update_group_adjust_peer_afs(peer); + /* graceful restart */ UNSET_FLAG (peer->sflags, PEER_STATUS_NSF_WAIT); for (afi = AFI_IP ; afi < AFI_MAX ; afi++) @@ -1449,14 +1453,17 @@ bgp_establish (struct peer *peer) || CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_OLD_RCV)) SET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_WAIT_REFRESH); - bgp_announce_route_all (peer); + bgp_announce_peer (peer); /* Start the route advertisement timer to send updates to the peer - if BGP * is not in read-only mode. If it is, the timer will be started at the end * of read-only mode. */ if (!bgp_update_delay_active(peer->bgp)) - BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 0); + { + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 0); + } if (peer->doppelganger && (peer->doppelganger->status != Deleted)) { diff --git a/bgpd/bgp_fsm.h b/bgpd/bgp_fsm.h index bd6c416d3265..6937a6c3b784 100644 --- a/bgpd/bgp_fsm.h +++ b/bgpd/bgp_fsm.h @@ -40,7 +40,13 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA if (!(T) && (peer->status != Deleted)) \ THREAD_WRITE_ON(master,(T),(F),peer,(V)); \ } while (0) - + +#define BGP_PEER_WRITE_ON(T,F,V, peer) \ + do { \ + if (!(T) && ((peer)->status != Deleted)) \ + THREAD_WRITE_ON(master,(T),(F),(peer),(V)); \ + } while (0) + #define BGP_WRITE_OFF(T) \ do { \ if (T) \ @@ -79,6 +85,7 @@ extern int bgp_event (struct thread *); extern int bgp_event_update (struct peer *, int event); extern int bgp_stop (struct peer *peer); extern void bgp_timer_set (struct peer *); +extern int bgp_routeadv_timer (struct thread *); extern void bgp_fsm_change_status (struct peer *peer, int status); extern const char *peer_down_str[]; extern void bgp_update_delay_end (struct bgp *); diff --git a/bgpd/bgp_main.c b/bgpd/bgp_main.c index 234f17d1f90a..4da30133e3a5 100644 --- a/bgpd/bgp_main.c +++ b/bgpd/bgp_main.c @@ -36,6 +36,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "filter.h" #include "plist.h" #include "stream.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" diff --git a/bgpd/bgp_mpath.c b/bgpd/bgp_mpath.c index b354520ed87b..2d080a12bf30 100644 --- a/bgpd/bgp_mpath.c +++ b/bgpd/bgp_mpath.c @@ -28,6 +28,7 @@ #include "linklist.h" #include "sockunion.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -412,7 +413,8 @@ bgp_info_mpath_update (struct bgp_node *rn, struct bgp_info *new_best, old_mpath_count = 0; prev_mpath = new_best; mp_node = listhead (mp_list); - debug = bgp_debug_update(NULL, &rn->p, 1) || bgp_debug_update(NULL, &rn->p, 0); + debug = bgp_debug_update(NULL, &rn->p, NULL, 1) || + bgp_debug_update(NULL, &rn->p, NULL, 0); if (debug) prefix2str (&rn->p, pfx_buf, sizeof (pfx_buf)); diff --git a/bgpd/bgp_mplsvpn.c b/bgpd/bgp_mplsvpn.c index 22ae54e0e004..a497efaf383c 100644 --- a/bgpd/bgp_mplsvpn.c +++ b/bgpd/bgp_mplsvpn.c @@ -25,6 +25,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "memory.h" #include "stream.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/bgpd/bgp_network.c b/bgpd/bgp_network.c index a36af72ff94f..eb8bad762720 100644 --- a/bgpd/bgp_network.c +++ b/bgpd/bgp_network.c @@ -31,6 +31,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "privs.h" #include "linklist.h" #include "network.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_open.h" diff --git a/bgpd/bgp_nexthop.c b/bgpd/bgp_nexthop.c index df3c90343120..8095e6233e47 100644 --- a/bgpd/bgp_nexthop.c +++ b/bgpd/bgp_nexthop.c @@ -31,6 +31,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "jhash.h" #include "nexthop.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/bgpd/bgp_open.c b/bgpd/bgp_open.c index 7aef76d53d28..d35be4b1f377 100644 --- a/bgpd/bgp_open.c +++ b/bgpd/bgp_open.c @@ -27,6 +27,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "command.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" diff --git a/bgpd/bgp_packet.c b/bgpd/bgp_packet.c index 7213d1a00b6f..5289f9cd5867 100644 --- a/bgpd/bgp_packet.c +++ b/bgpd/bgp_packet.c @@ -30,6 +30,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "sockunion.h" /* for inet_ntop () */ #include "linklist.h" #include "plist.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -47,11 +48,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_mplsvpn.h" #include "bgpd/bgp_advertise.h" #include "bgpd/bgp_vty.h" +#include "bgpd/bgp_updgrp.h" int stream_put_prefix (struct stream *, struct prefix *); /* Set up BGP packet marker and packet type. */ -static int +int bgp_packet_set_marker (struct stream *s, u_char type) { int i; @@ -72,7 +74,7 @@ bgp_packet_set_marker (struct stream *s, u_char type) /* Set BGP packet header size entry. If size is zero then use current stream size. */ -static int +int bgp_packet_set_size (struct stream *s) { int cp; @@ -85,7 +87,7 @@ bgp_packet_set_size (struct stream *s) } /* Add new packet to the peer. */ -static void +void bgp_packet_add (struct peer *peer, struct stream *s) { /* Add packet to the end of list. */ @@ -140,181 +142,6 @@ bgp_connect_check (struct peer *peer, int change_state) } } -/* Make BGP update packet. */ -static struct stream * -bgp_update_packet (struct peer *peer, afi_t afi, safi_t safi) -{ - struct stream *s; - struct stream *snlri; - struct bgp_adj_out *adj; - struct bgp_advertise *adv; - struct stream *packet; - struct bgp_node *rn = NULL; - struct bgp_info *binfo = NULL; - bgp_size_t total_attr_len = 0; - unsigned long attrlen_pos = 0; - int space_remaining = 0; - int space_needed = 0; - size_t mpattrlen_pos = 0; - size_t mpattr_pos = 0; - int num_pfx_adv = 0; - char send_attr_str[BUFSIZ]; - int send_attr_printed; - - s = peer->work; - stream_reset (s); - snlri = peer->scratch; - stream_reset (snlri); - - adv = FIFO_HEAD (&peer->sync[afi][safi]->update); - - while (adv) - { - assert (adv->rn); - rn = adv->rn; - adj = adv->adj; - if (adv->binfo) - binfo = adv->binfo; - - space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - - BGP_MAX_PACKET_SIZE_OVERFLOW; - space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); - - /* When remaining space can't include NLRI and it's length. */ - if (space_remaining < space_needed) - break; - - /* If packet is empty, set attribute. */ - if (stream_empty (s)) - { - struct peer *from = NULL; - - if (binfo) - from = binfo->peer; - - /* 1: Write the BGP message header - 16 bytes marker, 2 bytes length, - * one byte message type. - */ - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - - /* 2: withdrawn routes length */ - stream_putw (s, 0); - - /* 3: total attributes length - attrlen_pos stores the position */ - attrlen_pos = stream_get_endp (s); - stream_putw (s, 0); - - /* 4: if there is MP_REACH_NLRI attribute, that should be the first - * attribute, according to draft-ietf-idr-error-handling. Save the - * position. - */ - mpattr_pos = stream_get_endp(s); - - /* 5: Encode all the attributes, except MP_REACH_NLRI attr. */ - total_attr_len = bgp_packet_attribute (NULL, peer, s, - adv->baa->attr, - NULL, afi, safi, - from, NULL, NULL); - - space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - - BGP_MAX_PACKET_SIZE_OVERFLOW; - space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); - - /* If the attributes alone do not leave any room for NLRI then - * return */ - if (space_remaining < space_needed) - { - zlog_err ("%s cannot send UPDATE, the attributes do not leave " - "room for NLRI", peer->host); - /* Flush the FIFO update queue */ - while (adv) - adv = bgp_advertise_clean (peer, adv->adj, afi, safi); - return NULL; - } - - if (BGP_DEBUG (update, UPDATE_OUT) || - BGP_DEBUG (update, UPDATE_PREFIX)) - { - memset (send_attr_str, 0, BUFSIZ); - send_attr_printed = 0; - bgp_dump_attr (peer, adv->baa->attr, send_attr_str, BUFSIZ); - } - } - - if (afi == AFI_IP && safi == SAFI_UNICAST) - stream_put_prefix (s, &rn->p); - else - { - /* Encode the prefix in MP_REACH_NLRI attribute */ - struct prefix_rd *prd = NULL; - u_char *tag = NULL; - - if (rn->prn) - prd = (struct prefix_rd *) &rn->prn->p; - if (binfo && binfo->extra) - tag = binfo->extra->tag; - - if (stream_empty(snlri)) - mpattrlen_pos = bgp_packet_mpattr_start(snlri, afi, safi, - adv->baa->attr); - bgp_packet_mpattr_prefix(snlri, afi, safi, &rn->p, prd, tag); - } - num_pfx_adv++; - - if (bgp_debug_update(peer, &rn->p, 0)) - { - if (!send_attr_printed) - { - zlog_debug ("%s send UPDATE w/ attr: %s", peer->host, send_attr_str); - send_attr_printed = 1; - } - char buf[INET6_BUFSIZ]; - - zlog_debug ("%s send UPDATE %s/%d", - peer->host, - inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, INET6_BUFSIZ), - rn->p.prefixlen); - } - - /* Synchnorize attribute. */ - if (adj->attr) - bgp_attr_unintern (&adj->attr); - else - peer->scount[afi][safi]++; - - adj->attr = bgp_attr_intern (adv->baa->attr); - - adv = bgp_advertise_clean (peer, adj, afi, safi); - } - - if (! stream_empty (s)) - { - if (!stream_empty(snlri)) - { - bgp_packet_mpattr_end(snlri, mpattrlen_pos); - total_attr_len += stream_get_endp(snlri); - } - - /* set the total attribute length correctly */ - stream_putw_at (s, attrlen_pos, total_attr_len); - - if (!stream_empty(snlri)) - packet = stream_dupcat(s, snlri, mpattr_pos); - else - packet = stream_dup (s); - bgp_packet_set_size (packet); - if (BGP_DEBUG (update, UPDATE_OUT)) - zlog_debug("%s form UPDATE (adv) total len %d numPfx %d", - peer->host, - (stream_get_endp (s) - stream_get_getp (s)), num_pfx_adv); - bgp_packet_add (peer, packet); - stream_reset (s); - stream_reset (snlri); - return packet; - } - return NULL; -} - static struct stream * bgp_update_packet_eor (struct peer *peer, afi_t afi, safi_t safi) { @@ -358,299 +185,28 @@ bgp_update_packet_eor (struct peer *peer, afi_t afi, safi_t safi) return packet; } -/* Make BGP withdraw packet. */ -/* For ipv4 unicast: - 16-octet marker | 2-octet length | 1-octet type | - 2-octet withdrawn route length | withdrawn prefixes | 2-octet attrlen (=0) -*/ -/* For other afi/safis: - 16-octet marker | 2-octet length | 1-octet type | - 2-octet withdrawn route length (=0) | 2-octet attrlen | - mp_unreach attr type | attr len | afi | safi | withdrawn prefixes -*/ -static struct stream * -bgp_withdraw_packet (struct peer *peer, afi_t afi, safi_t safi) -{ - struct stream *s; - struct stream *packet; - struct bgp_adj_out *adj; - struct bgp_advertise *adv; - struct bgp_node *rn; - unsigned long pos; - bgp_size_t unfeasible_len; - bgp_size_t total_attr_len; - size_t mp_start = 0; - size_t attrlen_pos = 0; - size_t mplen_pos = 0; - u_char first_time = 1; - int space_remaining = 0; - int space_needed = 0; - int num_pfx_wd = 0; - - s = peer->work; - stream_reset (s); - - while ((adv = FIFO_HEAD (&peer->sync[afi][safi]->withdraw)) != NULL) - { - assert (adv->rn); - adj = adv->adj; - rn = adv->rn; - - space_remaining = STREAM_REMAIN (s) - - BGP_MAX_PACKET_SIZE_OVERFLOW; - space_needed = (BGP_NLRI_LENGTH + BGP_TOTAL_ATTR_LEN + - PSIZE (rn->p.prefixlen)); - - if (space_remaining < space_needed) - break; - - if (stream_empty (s)) - { - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - stream_putw (s, 0); /* unfeasible routes length */ - } - else - first_time = 0; - - if (afi == AFI_IP && safi == SAFI_UNICAST) - stream_put_prefix (s, &rn->p); - else - { - struct prefix_rd *prd = NULL; - - if (rn->prn) - prd = (struct prefix_rd *) &rn->prn->p; - - /* If first time, format the MP_UNREACH header */ - if (first_time) - { - attrlen_pos = stream_get_endp (s); - /* total attr length = 0 for now. reevaluate later */ - stream_putw (s, 0); - mp_start = stream_get_endp (s); - mplen_pos = bgp_packet_mpunreach_start(s, afi, safi); - } - - bgp_packet_mpunreach_prefix(s, &rn->p, afi, safi, prd, NULL); - } - num_pfx_wd++; - - if (bgp_debug_update(peer, &rn->p, 0)) - { - char buf[INET6_BUFSIZ]; - - zlog_debug ("%s send UPDATE %s/%d -- unreachable", - peer->host, - inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, INET6_BUFSIZ), - rn->p.prefixlen); - } - - peer->scount[afi][safi]--; - - bgp_adj_out_remove (rn, adj, peer, afi, safi); - bgp_unlock_node (rn); - } - - if (! stream_empty (s)) - { - if (afi == AFI_IP && safi == SAFI_UNICAST) - { - unfeasible_len - = stream_get_endp (s) - BGP_HEADER_SIZE - BGP_UNFEASIBLE_LEN; - stream_putw_at (s, BGP_HEADER_SIZE, unfeasible_len); - stream_putw (s, 0); - } - else - { - /* Set the mp_unreach attr's length */ - bgp_packet_mpunreach_end(s, mplen_pos); - - /* Set total path attribute length. */ - total_attr_len = stream_get_endp(s) - mp_start; - stream_putw_at (s, attrlen_pos, total_attr_len); - } - bgp_packet_set_size (s); - if (BGP_DEBUG (update, UPDATE_OUT)) - zlog_debug("%s form UPDATE (wd) total len %d numPfx %d", - peer->host, - (stream_get_endp (s) - stream_get_getp (s)), num_pfx_wd); - packet = stream_dup (s); - bgp_packet_add (peer, packet); - stream_reset (s); - return packet; - } - - return NULL; -} - -void -bgp_default_update_send (struct peer *peer, struct attr *attr, - afi_t afi, safi_t safi, struct peer *from) -{ - struct stream *s; - struct stream *packet; - struct prefix p; - unsigned long pos; - bgp_size_t total_attr_len; - - if (DISABLE_BGP_ANNOUNCE) - return; - - if (afi == AFI_IP) - str2prefix ("0.0.0.0/0", &p); -#ifdef HAVE_IPV6 - else - str2prefix ("::/0", &p); -#endif /* HAVE_IPV6 */ - - /* Logging the attribute. */ - if (bgp_debug_update(peer, &p, 0)) - { - char attrstr[BUFSIZ]; - char buf[INET6_BUFSIZ]; - attrstr[0] = '\0'; - - bgp_dump_attr (peer, attr, attrstr, BUFSIZ); - zlog_debug ("%s send UPDATE %s/%d %s", - peer->host, inet_ntop(p.family, &(p.u.prefix), buf, INET6_BUFSIZ), - p.prefixlen, attrstr); - } - - s = stream_new (BGP_MAX_PACKET_SIZE); - - /* Make BGP update packet. */ - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - - /* Unfeasible Routes Length. */ - stream_putw (s, 0); - - /* Make place for total attribute length. */ - pos = stream_get_endp (s); - stream_putw (s, 0); - total_attr_len = bgp_packet_attribute (NULL, peer, s, attr, &p, afi, safi, from, NULL, NULL); - - /* Set Total Path Attribute Length. */ - stream_putw_at (s, pos, total_attr_len); - - /* NLRI set. */ - if (p.family == AF_INET && safi == SAFI_UNICAST) - stream_put_prefix (s, &p); - - /* Set size. */ - bgp_packet_set_size (s); - - packet = stream_dup (s); - stream_free (s); - - /* Dump packet if debug option is set. */ -#ifdef DEBUG - /* bgp_packet_dump (packet); */ -#endif /* DEBUG */ - - /* Add packet to the peer. */ - bgp_packet_add (peer, packet); - - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); -} - -void -bgp_default_withdraw_send (struct peer *peer, afi_t afi, safi_t safi) -{ - struct stream *s; - struct stream *packet; - struct prefix p; - unsigned long attrlen_pos = 0; - unsigned long cp; - bgp_size_t unfeasible_len; - bgp_size_t total_attr_len; - size_t mp_start = 0; - size_t mplen_pos = 0; - - if (DISABLE_BGP_ANNOUNCE) - return; - - if (afi == AFI_IP) - str2prefix ("0.0.0.0/0", &p); -#ifdef HAVE_IPV6 - else - str2prefix ("::/0", &p); -#endif /* HAVE_IPV6 */ - - total_attr_len = 0; - - if (bgp_debug_update(peer, &p, 0)) - { - char buf[INET6_BUFSIZ]; - - zlog_debug ("%s send UPDATE %s/%d -- unreachable", - peer->host, inet_ntop(p.family, &(p.u.prefix), buf, INET6_BUFSIZ), - p.prefixlen); - } - - s = stream_new (BGP_MAX_PACKET_SIZE); - - /* Make BGP update packet. */ - bgp_packet_set_marker (s, BGP_MSG_UPDATE); - - /* Unfeasible Routes Length. */; - cp = stream_get_endp (s); - stream_putw (s, 0); - - /* Withdrawn Routes. */ - if (p.family == AF_INET && safi == SAFI_UNICAST) - { - stream_put_prefix (s, &p); - - unfeasible_len = stream_get_endp (s) - cp - 2; - - /* Set unfeasible len. */ - stream_putw_at (s, cp, unfeasible_len); - - /* Set total path attribute length. */ - stream_putw (s, 0); - } - else - { - attrlen_pos = stream_get_endp (s); - stream_putw (s, 0); - mp_start = stream_get_endp (s); - mplen_pos = bgp_packet_mpunreach_start(s, afi, safi); - bgp_packet_mpunreach_prefix(s, &p, afi, safi, NULL, NULL); - - /* Set the mp_unreach attr's length */ - bgp_packet_mpunreach_end(s, mplen_pos); - - /* Set total path attribute length. */ - total_attr_len = stream_get_endp(s) - mp_start; - stream_putw_at (s, attrlen_pos, total_attr_len); - } - - bgp_packet_set_size (s); - - packet = stream_dup (s); - stream_free (s); - - /* Add packet to the peer. */ - bgp_packet_add (peer, packet); - - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); -} - /* Get next packet to be written. */ static struct stream * bgp_write_packet (struct peer *peer) { + struct stream *s = NULL; + struct peer_af *paf; + struct bpacket *next_pkt; afi_t afi; safi_t safi; - struct stream *s = NULL; - struct bgp_advertise *adv; s = stream_fifo_head (peer->obuf); if (s) return s; - /* The code beyond this part deals with update packets, check if updates - are on hold as part of the update-delay post processing stages. */ + /* + * The code beyond this part deals with update packets, proceed only + * if peer is Established and updates are not on hold (as part of + * update-delay post processing). + */ + if (peer->status != Established) + return NULL; + if (peer->bgp && (peer->bgp->main_peers_update_hold || peer->bgp->rsclient_peers_update_hold)) return NULL; @@ -658,147 +214,118 @@ bgp_write_packet (struct peer *peer) for (afi = AFI_IP; afi < AFI_MAX; afi++) for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) { - adv = FIFO_HEAD (&peer->sync[afi][safi]->withdraw); - if (adv) + paf = peer_af_find (peer, afi, safi); + if (!paf || !PAF_SUBGRP(paf)) + continue; + next_pkt = paf->next_pkt_to_send; + + /* Try to generate a packet for the peer if we are at the end of + * the list. Always try to push out WITHDRAWs first. */ + if (!next_pkt || !next_pkt->buffer) { - s = bgp_withdraw_packet (peer, afi, safi); - if (s) - return s; + next_pkt = subgroup_withdraw_packet(PAF_SUBGRP(paf)); + if (!next_pkt || !next_pkt->buffer) + subgroup_update_packet (PAF_SUBGRP(paf)); + next_pkt = paf->next_pkt_to_send; } - } - - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - { - adv = FIFO_HEAD (&peer->sync[afi][safi]->update); - if (adv) - { - if (adv->binfo && adv->binfo->uptime <= peer->synctime) + + /* If we still don't have a packet to send to the peer, then + * try to find out out if we have to send eor or if not, skip to + * the next AFI, SAFI. + * Don't send the EOR prematurely... if the subgroup's coalesce + * timer is running, the adjacency-out structure is not created + * yet. + */ + if (!next_pkt || !next_pkt->buffer) + { + if (CHECK_FLAG (peer->cap, PEER_CAP_RESTART_RCV)) { - if (CHECK_FLAG (adv->binfo->peer->cap, PEER_CAP_RESTART_RCV) - && CHECK_FLAG (adv->binfo->peer->cap, PEER_CAP_RESTART_ADV) - && ! (CHECK_FLAG (adv->binfo->peer->cap, - PEER_CAP_RESTART_BIT_RCV) && - CHECK_FLAG (adv->binfo->peer->cap, - PEER_CAP_RESTART_BIT_ADV)) - && ! CHECK_FLAG (adv->binfo->flags, BGP_INFO_STALE) + if (!(PAF_SUBGRP(paf))->t_coalesce && + peer->afc_nego[afi][safi] && peer->synctime + && ! CHECK_FLAG (peer->af_sflags[afi][safi], + PEER_STATUS_EOR_SEND) && safi != SAFI_MPLS_VPN) { - if (CHECK_FLAG (adv->binfo->peer->af_sflags[afi][safi], - PEER_STATUS_EOR_RECEIVED)) - s = bgp_update_packet (peer, afi, safi); + SET_FLAG (peer->af_sflags[afi][safi], + PEER_STATUS_EOR_SEND); + return bgp_update_packet_eor (peer, afi, safi); } - else - s = bgp_update_packet (peer, afi, safi); - } - - if (s) - return s; - } - if (CHECK_FLAG (peer->cap, PEER_CAP_RESTART_RCV)) - { - if (peer->afc_nego[afi][safi] && peer->synctime - && ! CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_EOR_SEND) - && safi != SAFI_MPLS_VPN) - { - SET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_EOR_SEND); - return bgp_update_packet_eor (peer, afi, safi); } + continue; } - } - - return NULL; -} -/* Are there prefixes queued for being withdrawn? */ -int -bgp_peer_wd_fifo_exists (struct peer *peer) -{ - afi_t afi; - safi_t safi; - struct bgp_advertise *adv; - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - if (FIFO_HEAD (&peer->sync[afi][safi]->withdraw)) - return 1; + /* + * Found a packet template to send, overwrite packet with appropriate + * attributes from peer and advance peer + */ + s = bpacket_reformat_for_peer (next_pkt, paf); + bpacket_queue_advance_peer (paf); + if (bgp_debug_update(peer, NULL, NULL, 0)) + zlog_debug ("u%llu:s%llu %s send UPDATE len %d ", + PAF_SUBGRP(paf)->update_group->id, PAF_SUBGRP(paf)->id, + peer->host, (stream_get_endp(s) - stream_get_getp(s))); + return s; + } - return 0; + return NULL; } -/* Are there prefixes queued for being advertised? - * Are they recent? - */ -int -bgp_peer_adv_fifo_exists (struct peer *peer, int chk_recent) +/* The next action for the peer from a write perspective */ +static void +bgp_write_proceed_actions (struct peer *peer) { afi_t afi; safi_t safi; - struct bgp_advertise *adv; - - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - if ((adv = FIFO_HEAD (&peer->sync[afi][safi]->update)) != NULL) - { - if (!chk_recent) - return 1; - if (adv->binfo->uptime < peer->synctime) - return 1; - } - - return 0; -} + struct peer_af *paf; + struct bpacket *next_pkt; + int fullq_found = 0; -/* - * Schedule updates for the peer, if needed. - */ -void -bgp_peer_schedule_updates(struct peer *peer) -{ - /* If withdraw FIFO exists, immediately schedule write */ - if (bgp_peer_wd_fifo_exists(peer) && !peer->t_write) + if (stream_fifo_head (peer->obuf)) { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling write thread", peer->host); BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; } - /* If update FIFO exists, fire MRAI timer */ - if (bgp_peer_adv_fifo_exists(peer, 0) && !peer->radv_adjusted) + for (afi = AFI_IP; afi < AFI_MAX; afi++) + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + { + paf = peer_af_find (peer, afi, safi); + if (!paf) + continue; + next_pkt = paf->next_pkt_to_send; + if (next_pkt && next_pkt->buffer) + { + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; + } + /* No packets readily available for AFI/SAFI, are there subgroup packets + * that need to be generated? */ + if (paf->subgroup && + bpacket_queue_is_full(SUBGRP_INST(paf->subgroup), + SUBGRP_PKTQ(paf->subgroup))) + fullq_found = 1; + else if (subgroup_packets_to_build (paf->subgroup)) + { + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; + } + } + if (fullq_found) { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s scheduling MRAI timer", peer->host); - bgp_adjust_routeadv(peer); + BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + return; } } -/* Is there partially written packet or updates we can send right - now. */ -static int -bgp_write_proceed (struct peer *peer) -{ - /* If queued packet exists, we should try to write it */ - if (stream_fifo_head (peer->obuf)) - return 1; - - /* If there are prefixes to be withdrawn or to be advertised (and - * queued before last MRAI timer expiry), schedule write - */ - if (bgp_peer_wd_fifo_exists(peer) - || bgp_peer_adv_fifo_exists(peer, 1)) - return 1; - - return 0; -} - /* Write packet to the peer. */ int bgp_write (struct thread *thread) { struct peer *peer; u_char type; - struct stream *s; + struct stream *s; int num; unsigned int count = 0; int oc = 0; @@ -816,7 +343,10 @@ bgp_write (struct thread *thread) s = bgp_write_packet (peer); if (!s) - return 0; /* nothing to send */ + { + bgp_write_proceed_actions (peer); + return 0; + } sockopt_cork (peer->fd, 1); @@ -892,8 +422,7 @@ bgp_write (struct thread *thread) while (++count < peer->bgp->wpkt_quanta && (s = bgp_write_packet (peer)) != NULL); - if (bgp_write_proceed (peer)) - BGP_WRITE_ON (peer->t_write, bgp_write, peer->fd); + bgp_write_proceed_actions (peer); done: /* Update the last write if some updates were written. */ @@ -910,7 +439,7 @@ bgp_write_notify (struct peer *peer) { int ret, val; u_char type; - struct stream *s; + struct stream *s; /* There should be at least one packet. */ s = stream_fifo_head (peer->obuf); @@ -1801,8 +1330,6 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) if (ret < 0) return -1; - zlog_err ("%s [Update:RECV] Unfeasible NLRI received", peer->host); - withdraw.afi = AFI_IP; withdraw.safi = SAFI_UNICAST; withdraw.nlri = stream_pnt (s); @@ -1871,7 +1398,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) zlog_err ("%s rcvd UPDATE with errors in attr(s)!! Withdrawing route.", peer->host); - if (ret && bgp_debug_update(peer, NULL, 1)) + if (ret && bgp_debug_update(peer, NULL, NULL, 1)) { zlog_debug ("%s rcvd UPDATE w/ attr: %s", peer->host, peer->rcvd_attr_str); peer->rcvd_attr_printed = 1; @@ -2052,7 +1579,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) if (peer->nsf[AFI_IP6][SAFI_MULTICAST]) bgp_clear_stale_route (peer, AFI_IP6, SAFI_MULTICAST); - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("rcvd End-of-RIB for IPv6 Multicast from %s", peer->host); } } @@ -2082,7 +1609,7 @@ bgp_update_receive (struct peer *peer, bgp_size_t size) bgp_update_explicit_eors(peer); } - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug ("rcvd End-of-RIB for VPNv4 Unicast from %s", peer->host); } } @@ -2227,7 +1754,7 @@ bgp_route_refresh_receive (struct peer *peer, bgp_size_t size) reserved = stream_getc (s); safi = stream_getc (s); - if (bgp_debug_update(peer, NULL, 0)) + if (bgp_debug_update(peer, NULL, NULL, 0)) zlog_debug ("%s rcvd REFRESH_REQ for afi/safi: %d/%d", peer->host, afi, safi); diff --git a/bgpd/bgp_packet.h b/bgpd/bgp_packet.h index bb3903cf7809..34b666fe72b3 100644 --- a/bgpd/bgp_packet.h +++ b/bgpd/bgp_packet.h @@ -26,12 +26,6 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define BGP_UNFEASIBLE_LEN 2U #define BGP_WRITE_PACKET_MAX 10U -/* Size of FIFOs upon which write thread is triggered. Note that write - * thread is also triggered upon BGP work-queue completion. - */ -#define BGP_ADV_FIFO_QUANTA 500 -#define BGP_WD_FIFO_QUANTA 200 - /* When to refresh */ #define REFRESH_IMMEDIATE 1 #define REFRESH_DEFER 2 @@ -63,8 +57,10 @@ extern int bgp_capability_receive (struct peer *, bgp_size_t); extern void bgp_update_restarted_peers (struct peer *); extern void bgp_update_implicit_eors (struct peer *); extern void bgp_check_update_delay (struct bgp *); -extern int bgp_peer_wd_fifo_exists (struct peer *); -extern int bgp_peer_adv_fifo_exists (struct peer *, int); -extern void bgp_peer_schedule_updates(struct peer *peer); extern int bgp_valid_host_address (unsigned long addr); + +extern int bgp_packet_set_marker (struct stream *s, u_char type); +extern int bgp_packet_set_size (struct stream *s); +extern void bgp_packet_add (struct peer *peer, struct stream *s); + #endif /* _QUAGGA_BGP_PACKET_H */ diff --git a/bgpd/bgp_regex.c b/bgpd/bgp_regex.c index 9b65f7cb153a..15c23c9ff965 100644 --- a/bgpd/bgp_regex.c +++ b/bgpd/bgp_regex.c @@ -23,6 +23,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "command.h" #include "memory.h" +#include "queue.h" #include "bgpd.h" #include "bgp_aspath.h" diff --git a/bgpd/bgp_route.c b/bgpd/bgp_route.c index f0b1e5ddf99e..7094e8ca455d 100644 --- a/bgpd/bgp_route.c +++ b/bgpd/bgp_route.c @@ -34,6 +34,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "plist.h" #include "thread.h" #include "workqueue.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -56,6 +57,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_vty.h" #include "bgpd/bgp_mpath.h" #include "bgpd/bgp_nht.h" +#include "bgpd/bgp_updgrp.h" /* Extern from bgp_dump.c */ extern const char *bgp_origin_str[]; @@ -986,7 +988,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, { if (IPV4_ADDR_SAME (&peer->remote_id, &riattr->extra->originator_id)) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] %s/%d originator-id is same as remote router-id", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -1008,7 +1010,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, /* Output filter check. */ if (bgp_output_filter (peer, p, riattr, afi, safi) == FILTER_DENY) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] %s/%d is filtered", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -1020,7 +1022,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, /* AS path loop check. */ if (aspath_loop_check (riattr->aspath, peer->as)) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", peer->host, peer->as); return 0; @@ -1032,7 +1034,7 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, { if (aspath_loop_check(riattr->aspath, bgp->confed_id)) { - if (bgp_debug_update(peer, p, 0)) + if (bgp_debug_update(peer, p, NULL, 0)) zlog_debug("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", peer->host, bgp->confed_id); @@ -1228,28 +1230,558 @@ bgp_announce_check (struct bgp_info *ri, struct peer *peer, struct prefix *p, return 1; } +static void +subgroup_announce_reset_nhop (u_char family, struct attr *attr) +{ + if (family == AF_INET) + attr->nexthop.s_addr = 0; +#ifdef HAVE_IPV6 + if (family == AF_INET6) + memset (&attr->extra->mp_nexthop_global, 0, IPV6_MAX_BYTELEN); +#endif +} + +int +subgroup_announce_check (struct bgp_info *ri, struct update_subgroup *subgrp, + struct prefix *p, struct attr *attr) +{ + struct bgp_filter *filter; + struct peer *from; + struct peer *peer; + struct peer *onlypeer; + struct bgp *bgp; + struct attr *riattr; + struct peer_af *paf; + char buf[SU_ADDRSTRLEN]; + int ret; + int transparent; + int reflect; + afi_t afi; + safi_t safi; + + if (DISABLE_BGP_ANNOUNCE) + return 0; + + afi = SUBGRP_AFI(subgrp); + safi = SUBGRP_SAFI(subgrp); + peer = SUBGRP_PEER(subgrp); + onlypeer = NULL; + if (CHECK_FLAG (peer->flags, PEER_FLAG_LONESOUL)) + onlypeer = SUBGRP_PFIRST(subgrp)->peer; + + from = ri->peer; + filter = &peer->filter[afi][safi]; + bgp = SUBGRP_INST(subgrp); + riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; + + /* Aggregate-address suppress check. */ + if (ri->extra && ri->extra->suppress) + if (! UNSUPPRESS_MAP_NAME (filter)) + { + return 0; + } + + /* Do not send announces to RS-clients from the 'normal' bgp_table. */ + if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + { + return 0; + } + + /* Do not send back route to sender. */ + if (onlypeer && from == onlypeer) + { + return 0; + } + + /* Transparency check. */ + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT) + && CHECK_FLAG (from->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + transparent = 1; + else + transparent = 0; + + /* If community is not disabled check the no-export and local. */ + if (! transparent && bgp_community_filter (peer, riattr)) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("subgrpannouncecheck: community filter check fail"); + return 0; + } + + /* If the attribute has originator-id and it is same as remote + peer's id. */ + if (onlypeer && + riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID) && + (IPV4_ADDR_SAME (&onlypeer->remote_id, &riattr->extra->originator_id))) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] %s/%d originator-id is same as " + "remote router-id", + onlypeer->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + + /* ORF prefix-list filter check */ + if (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_ADV) + && (CHECK_FLAG (peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_RCV) + || CHECK_FLAG (peer->af_cap[afi][safi], + PEER_CAP_ORF_PREFIX_SM_OLD_RCV))) + if (peer->orf_plist[afi][safi]) + { + if (prefix_list_apply (peer->orf_plist[afi][safi], p) == PREFIX_DENY) + { + return 0; + } + } + + /* Output filter check. */ + if (bgp_output_filter (peer, p, riattr, afi, safi) == FILTER_DENY) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] %s/%d is filtered", + peer->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + +#ifdef BGP_SEND_ASPATH_CHECK + /* AS path loop check. */ + if (onlypeer && aspath_loop_check (riattr->aspath, onlypeer->as)) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u " + "that is part of AS path.", + onlypeer->host, onlypeer->as); + return 0; + } +#endif /* BGP_SEND_ASPATH_CHECK */ + + /* If we're a CONFED we need to loop check the CONFED ID too */ + if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION)) + { + if (aspath_loop_check(riattr->aspath, bgp->confed_id)) + { + if (bgp_debug_update(NULL, p, subgrp->update_group, 0)) + zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u" + " is AS path.", + peer->host, + bgp->confed_id); + return 0; + } + } + + /* Route-Reflect check. */ + if (from->sort == BGP_PEER_IBGP && peer->sort == BGP_PEER_IBGP) + reflect = 1; + else + reflect = 0; + + /* IBGP reflection check. */ + if (reflect) + { + /* A route from a Client peer. */ + if (CHECK_FLAG (from->af_flags[afi][safi], PEER_FLAG_REFLECTOR_CLIENT)) + { + /* Reflect to all the Non-Client peers and also to the + Client peers other than the originator. Originator check + is already done. So there is noting to do. */ + /* no bgp client-to-client reflection check. */ + if (bgp_flag_check (bgp, BGP_FLAG_NO_CLIENT_TO_CLIENT)) + if (CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_REFLECTOR_CLIENT)) + return 0; + } + else + { + /* A route from a Non-client peer. Reflect to all other + clients. */ + if (! CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_REFLECTOR_CLIENT)) + return 0; + } + } + + /* For modify attribute, copy it to temporary structure. */ + bgp_attr_dup (attr, riattr); + + /* If local-preference is not set. */ + if ((peer->sort == BGP_PEER_IBGP + || peer->sort == BGP_PEER_CONFED) + && (! (attr->flag & ATTR_FLAG_BIT (BGP_ATTR_LOCAL_PREF)))) + { + attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_LOCAL_PREF); + attr->local_pref = bgp->default_local_pref; + } + + /* If originator-id is not set and the route is to be reflected, + set the originator id */ + if (reflect && (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID)))) + { + attr->extra = bgp_attr_extra_get(attr); + IPV4_ADDR_COPY(&(attr->extra->originator_id), &(from->remote_id)); + SET_FLAG(attr->flag, BGP_ATTR_ORIGINATOR_ID); + } + + /* Remove MED if its an EBGP peer - will get overwritten by route-maps */ + if (peer->sort == BGP_PEER_EBGP + && attr->flag & ATTR_FLAG_BIT (BGP_ATTR_MULTI_EXIT_DISC)) + { + if (ri->peer != bgp->peer_self && ! transparent + && ! CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_MED_UNCHANGED)) + attr->flag &= ~(ATTR_FLAG_BIT (BGP_ATTR_MULTI_EXIT_DISC)); + } + + /* Since the nexthop attribute can vary per peer, it is not explicitly set + * in announce check, only certain flags and length (or number of nexthops + * -- for IPv6/MP_REACH) are set here in order to guide the update formation + * code in setting the nexthop(s) on a per peer basis in reformat_peer(). + * Typically, the source nexthop in the attribute is preserved but in the + * scenarios where we know it will always be overwritten, we reset the + * nexthop to "0" in an attempt to achieve better Update packing. An + * example of this is when a prefix from each of 2 IBGP peers needs to be + * announced to an EBGP peer (and they have the same attributes barring + * their nexthop). + */ + if (reflect) + SET_FLAG(attr->rmap_change_flags, BATTR_REFLECTED); + +#ifdef HAVE_IPV6 + /* IPv6/MP starts with 1 nexthop, the link-local address is passed only if + * we're not reflecting the route and the peer (group) to whom we're going + * to announce is on a shared network (directly connected peers) or the + * peer (group) is configured to receive link-local nexthop and it is + * available in the prefix. + * Of course, the operator can always set it through the route-map, if + * so desired. + */ + if (p->family == AF_INET6) + { + attr->extra->mp_nexthop_len = 16; + if (!reflect) + { + if (peer->shared_network || + (CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED) && + IN6_IS_ADDR_LINKLOCAL (&attr->extra->mp_nexthop_local))) + attr->extra->mp_nexthop_len = 32; + } + + /* Clear off link-local nexthop in source, if not needed. This may help + * more prefixes share the same attribute for announcement. + */ + if (!(CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED))) + memset (&attr->extra->mp_nexthop_local, 0, IPV6_MAX_BYTELEN); + } +#endif /* HAVE_IPV6 */ + + bgp_peer_remove_private_as(bgp, afi, safi, peer, attr); + bgp_peer_as_override(bgp, afi, safi, peer, attr); + + /* Route map & unsuppress-map apply. */ + if (ROUTE_MAP_OUT_NAME (filter) + || (ri->extra && ri->extra->suppress) ) + { + struct bgp_info info; + struct attr dummy_attr; + struct attr_extra dummy_extra; + + dummy_attr.extra = &dummy_extra; + + info.peer = peer; + info.attr = attr; + + /* + * The route reflector is not allowed to modify the attributes + * of the reflected IBGP routes unless explicitly allowed. + */ + if ((from->sort == BGP_PEER_IBGP && peer->sort == BGP_PEER_IBGP) + && !bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) + { + bgp_attr_dup (&dummy_attr, attr); + info.attr = &dummy_attr; + } + + SET_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT); + + if (ri->extra && ri->extra->suppress) + ret = route_map_apply (UNSUPPRESS_MAP (filter), p, RMAP_BGP, &info); + else + ret = route_map_apply (ROUTE_MAP_OUT (filter), p, RMAP_BGP, &info); + + peer->rmap_type = 0; + + if (ret == RMAP_DENYMATCH) + { + bgp_attr_flush (attr); + return 0; + } + } + + /* After route-map has been applied, we check to see if the nexthop to + * be carried in the attribute (that is used for the announcement) can + * be cleared off or not. We do this in all cases where we would be + * setting the nexthop to "ourselves". For IPv6, we only need to consider + * the global nexthop here; the link-local nexthop would have been cleared + * already, and if not, it is required by the update formation code. + * Also see earlier comments in this function. + */ + if (!(CHECK_FLAG(attr->rmap_change_flags, BATTR_RMAP_NEXTHOP_CHANGED) || + transparent || + CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_UNCHANGED))) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_NEXTHOP_SELF)) + { + if (!reflect || + CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_FORCE_NEXTHOP_SELF)) + subgroup_announce_reset_nhop (p->family, attr); + } + else if (peer->sort == BGP_PEER_EBGP) + { + SUBGRP_FOREACH_PEER (subgrp, paf) + { + if (bgp_multiaccess_check_v4 (riattr->nexthop, paf->peer)) + break; + } + if (!paf) + subgroup_announce_reset_nhop (p->family, attr); + } + } + + return 1; +} + +static int +bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, + struct prefix *p, struct attr *attr, afi_t afi, safi_t safi) +{ + int ret; + char buf[SU_ADDRSTRLEN]; + struct bgp_filter *filter; + struct bgp_info info; + struct peer *from; + struct attr *riattr; + + from = ri->peer; + filter = &rsclient->filter[afi][safi]; + riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; + + if (DISABLE_BGP_ANNOUNCE) + return 0; + + /* Do not send back route to sender. */ + if (from == rsclient) + return 0; + + /* Aggregate-address suppress check. */ + if (ri->extra && ri->extra->suppress) + if (! UNSUPPRESS_MAP_NAME (filter)) + return 0; + + /* Default route check. */ + if (CHECK_FLAG (rsclient->af_sflags[afi][safi], + PEER_STATUS_DEFAULT_ORIGINATE)) + { + if (p->family == AF_INET && p->u.prefix4.s_addr == INADDR_ANY) + return 0; +#ifdef HAVE_IPV6 + else if (p->family == AF_INET6 && p->prefixlen == 0) + return 0; +#endif /* HAVE_IPV6 */ + } + + /* If the attribute has originator-id and it is same as remote + peer's id. */ + if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) + { + if (IPV4_ADDR_SAME (&rsclient->remote_id, + &riattr->extra->originator_id)) + { + if (bgp_debug_update(rsclient, p, NULL, 0)) + zlog_debug ("%s [Update:SEND] %s/%d originator-id is same as remote router-id", + rsclient->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + } + + /* ORF prefix-list filter check */ + if (CHECK_FLAG (rsclient->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_ADV) + && (CHECK_FLAG (rsclient->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_RCV) + || CHECK_FLAG (rsclient->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_OLD_RCV))) + if (rsclient->orf_plist[afi][safi]) + { + if (prefix_list_apply (rsclient->orf_plist[afi][safi], p) == PREFIX_DENY) + return 0; + } + + /* Output filter check. */ + if (bgp_output_filter (rsclient, p, riattr, afi, safi) == FILTER_DENY) + { + if (bgp_debug_update(rsclient, p, NULL, 0)) + zlog_debug ("%s [Update:SEND] %s/%d is filtered", + rsclient->host, + inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), + p->prefixlen); + return 0; + } + +#ifdef BGP_SEND_ASPATH_CHECK + /* AS path loop check. */ + if (aspath_loop_check (riattr->aspath, rsclient->as)) + { + if (bgp_debug_update(rsclient, p, NULL, 0)) + zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", + rsclient->host, rsclient->as); + return 0; + } +#endif /* BGP_SEND_ASPATH_CHECK */ + + /* For modify attribute, copy it to temporary structure. */ + bgp_attr_dup (attr, riattr); + + /* next-hop-set */ + if ((p->family == AF_INET && attr->nexthop.s_addr == 0) +#ifdef HAVE_IPV6 + || (p->family == AF_INET6 && + IN6_IS_ADDR_UNSPECIFIED(&attr->extra->mp_nexthop_global)) +#endif /* HAVE_IPV6 */ + ) + { + /* Set IPv4 nexthop. */ + if (p->family == AF_INET) + { + if (safi == SAFI_MPLS_VPN) + memcpy (&attr->extra->mp_nexthop_global_in, &rsclient->nexthop.v4, + IPV4_MAX_BYTELEN); + else + memcpy (&attr->nexthop, &rsclient->nexthop.v4, IPV4_MAX_BYTELEN); + } +#ifdef HAVE_IPV6 + /* Set IPv6 nexthop. */ + if (p->family == AF_INET6) + { + /* IPv6 global nexthop must be included. */ + memcpy (&attr->extra->mp_nexthop_global, &rsclient->nexthop.v6_global, + IPV6_MAX_BYTELEN); + attr->extra->mp_nexthop_len = 16; + } +#endif /* HAVE_IPV6 */ + } + +#ifdef HAVE_IPV6 + if (p->family == AF_INET6) + { + struct attr_extra *attre = attr->extra; + + /* Left nexthop_local unchanged if so configured. */ + if ( CHECK_FLAG (rsclient->af_flags[afi][safi], + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED) ) + { + if ( IN6_IS_ADDR_LINKLOCAL (&attre->mp_nexthop_local) ) + attre->mp_nexthop_len=32; + else + attre->mp_nexthop_len=16; + } + + /* Default nexthop_local treatment for RS-Clients */ + else + { + /* Announcer and RS-Client are both in the same network */ + if (rsclient->shared_network && from->shared_network && + (rsclient->ifindex == from->ifindex)) + { + if ( IN6_IS_ADDR_LINKLOCAL (&attre->mp_nexthop_local) ) + attre->mp_nexthop_len=32; + else + attre->mp_nexthop_len=16; + } + + /* Set link-local address for shared network peer. */ + else if (rsclient->shared_network + && IN6_IS_ADDR_LINKLOCAL (&rsclient->nexthop.v6_local)) + { + memcpy (&attre->mp_nexthop_local, &rsclient->nexthop.v6_local, + IPV6_MAX_BYTELEN); + attre->mp_nexthop_len = 32; + } + + else + attre->mp_nexthop_len = 16; + } + + } +#endif /* HAVE_IPV6 */ + + + /* If this is EBGP peer and remove-private-AS is set. */ + if (rsclient->sort == BGP_PEER_EBGP + && peer_af_flag_check (rsclient, afi, safi, PEER_FLAG_REMOVE_PRIVATE_AS) + && aspath_private_as_check (attr->aspath)) + attr->aspath = aspath_empty_get (); + + /* Route map & unsuppress-map apply. */ + if (ROUTE_MAP_OUT_NAME (filter) || (ri->extra && ri->extra->suppress) ) + { + info.peer = rsclient; + info.attr = attr; + + SET_FLAG (rsclient->rmap_type, PEER_RMAP_TYPE_OUT); + + if (ri->extra && ri->extra->suppress) + ret = route_map_apply (UNSUPPRESS_MAP (filter), p, RMAP_BGP, &info); + else + ret = route_map_apply (ROUTE_MAP_OUT (filter), p, RMAP_BGP, &info); + + rsclient->rmap_type = 0; + + if (ret == RMAP_DENYMATCH) + { + bgp_attr_flush (attr); + return 0; + } + } + + return 1; +} + static int -bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, - struct prefix *p, struct attr *attr, afi_t afi, safi_t safi) +subgroup_announce_check_rsclient (struct bgp_info *ri, + struct update_subgroup *subgrp, + struct prefix *p, struct attr *attr) { int ret; char buf[SU_ADDRSTRLEN]; struct bgp_filter *filter; struct bgp_info info; struct peer *from; + struct peer *rsclient; + struct peer *onlypeer; struct attr *riattr; struct bgp *bgp; + afi_t afi; + safi_t safi; + + if (DISABLE_BGP_ANNOUNCE) + return 0; + afi = SUBGRP_AFI(subgrp); + safi = SUBGRP_SAFI(subgrp); + rsclient = SUBGRP_PEER(subgrp); + onlypeer = ((SUBGRP_PCOUNT(subgrp) == 1) ? + (SUBGRP_PFIRST(subgrp))->peer : NULL); from = ri->peer; filter = &rsclient->filter[afi][safi]; bgp = rsclient->bgp; riattr = bgp_info_mpath_count (ri) ? bgp_info_mpath_attr (ri) : ri->attr; - if (DISABLE_BGP_ANNOUNCE) - return 0; - /* Do not send back route to sender. */ - if (from == rsclient) + if (onlypeer && (from == onlypeer)) return 0; /* Aggregate-address suppress check. */ @@ -1271,14 +1803,14 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, /* If the attribute has originator-id and it is same as remote peer's id. */ - if (riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) + if (onlypeer && riattr->flag & ATTR_FLAG_BIT (BGP_ATTR_ORIGINATOR_ID)) { - if (IPV4_ADDR_SAME (&rsclient->remote_id, + if (IPV4_ADDR_SAME (&onlypeer->remote_id, &riattr->extra->originator_id)) { - if (bgp_debug_update(rsclient, p, 0)) + if (bgp_debug_update(rsclient, p, subgrp->update_group, 0)) zlog_debug ("%s [Update:SEND] %s/%d originator-id is same as remote router-id", - rsclient->host, + onlypeer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen); return 0; @@ -1298,7 +1830,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, /* Output filter check. */ if (bgp_output_filter (rsclient, p, riattr, afi, safi) == FILTER_DENY) { - if (bgp_debug_update(rsclient, p, 0)) + if (bgp_debug_update(rsclient, p, subgrp->update_group, 0)) zlog_debug ("%s [Update:SEND] %s/%d is filtered", rsclient->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -1308,11 +1840,11 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, #ifdef BGP_SEND_ASPATH_CHECK /* AS path loop check. */ - if (aspath_loop_check (riattr->aspath, rsclient->as)) + if (onlypeer && aspath_loop_check (riattr->aspath, onlypeer->as)) { - if (bgp_debug_update(rsclient, p, 0)) + if (bgp_debug_update(rsclient, p, subgrp->update_group, 0)) zlog_debug ("%s [Update:SEND] suppress announcement to peer AS %u is AS path.", - rsclient->host, rsclient->as); + onlypeer->host, onlypeer->as); return 0; } #endif /* BGP_SEND_ASPATH_CHECK */ @@ -1355,7 +1887,7 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, struct attr_extra *attre = attr->extra; /* Left nexthop_local unchanged if so configured. */ - if ( CHECK_FLAG (rsclient->af_flags[afi][safi], + if ( CHECK_FLAG (rsclient->af_flags[afi][safi], PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED) ) { if ( IN6_IS_ADDR_LINKLOCAL (&attre->mp_nexthop_local) ) @@ -1363,11 +1895,11 @@ bgp_announce_check_rsclient (struct bgp_info *ri, struct peer *rsclient, else attre->mp_nexthop_len=16; } - + /* Default nexthop_local treatment for RS-Clients */ - else - { - /* Announcer and RS-Client are both in the same network */ + else + { + /* Announcer and RS-Client are both in the same network */ if (rsclient->shared_network && from->shared_network && (rsclient->ifindex == from->ifindex)) { @@ -1561,27 +2093,32 @@ bgp_best_selection (struct bgp *bgp, struct bgp_node *rn, return; } -static int -bgp_process_announce_selected (struct peer *peer, struct bgp_info *selected, - struct bgp_node *rn, afi_t afi, safi_t safi) +/* + * A new route/change in bestpath of an existing route. Evaluate the path + * for advertisement to the subgroup. + */ +int +subgroup_process_announce_selected (struct update_subgroup *subgrp, + struct bgp_info *selected, + struct bgp_node *rn) { struct prefix *p; + struct peer_af *paf; + struct peer *onlypeer; struct attr attr; struct attr_extra extra; + afi_t afi; + safi_t safi; p = &rn->p; - - /* Announce route to Established peer. */ - if (peer->status != Established) - return 0; - - /* Address family configuration check. */ - if (! peer->afc_nego[afi][safi]) - return 0; + afi = SUBGRP_AFI(subgrp); + safi = SUBGRP_SAFI(subgrp); + onlypeer = ((SUBGRP_PCOUNT(subgrp) == 1) ? + (SUBGRP_PFIRST(subgrp))->peer : NULL); /* First update is deferred until ORF or ROUTE-REFRESH is received */ - if (CHECK_FLAG (peer->af_sflags[afi][safi], - PEER_STATUS_ORF_WAIT_REFRESH)) + if (onlypeer && CHECK_FLAG (onlypeer->af_sflags[afi][safi], + PEER_STATUS_ORF_WAIT_REFRESH)) return 0; /* It's initialized in bgp_announce_[check|check_rsclient]() */ @@ -1590,28 +2127,29 @@ bgp_process_announce_selected (struct peer *peer, struct bgp_info *selected, switch (bgp_node_table (rn)->type) { case BGP_TABLE_MAIN: - /* Announcement to peer->conf. If the route is filtered, + /* Announcement to the subgroup. If the route is filtered, withdraw it. */ - if (selected && bgp_announce_check (selected, peer, p, &attr, afi, safi)) - bgp_adj_out_set (rn, peer, p, &attr, afi, safi, selected); + if (selected && subgroup_announce_check(selected, subgrp, p, &attr)) + bgp_adj_out_set_subgroup(rn, subgrp, &attr, selected); else - bgp_adj_out_unset (rn, peer, p, afi, safi); + bgp_adj_out_unset_subgroup(rn, subgrp); + break; case BGP_TABLE_RSCLIENT: - /* Announcement to peer->conf. If the route is filtered, + /* Announcement to peer->conf. If the route is filtered, withdraw it. */ - if (selected && - bgp_announce_check_rsclient (selected, peer, p, &attr, afi, safi)) - bgp_adj_out_set (rn, peer, p, &attr, afi, safi, selected); + if (selected && + subgroup_announce_check_rsclient (selected, subgrp, p, &attr)) + bgp_adj_out_set_subgroup (rn, subgrp, &attr, selected); else - bgp_adj_out_unset (rn, peer, p, afi, safi); + bgp_adj_out_unset_subgroup(rn, subgrp); break; } return 0; } -struct bgp_process_queue +struct bgp_process_queue { struct bgp *bgp; struct bgp_node *rn; @@ -1632,6 +2170,8 @@ bgp_process_rsclient (struct work_queue *wq, void *data) struct bgp_info_pair old_and_new; struct listnode *node, *nnode; struct peer *rsclient; + struct peer_af *paf; + struct update_subgroup *subgrp; /* Is it end of initial update? (after startup) */ if (!rn) @@ -1672,8 +2212,12 @@ bgp_process_rsclient (struct work_queue *wq, void *data) UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } - bgp_process_announce_selected (rsclient, new_select, rn, - afi, safi); + paf = peer_af_find(rsclient, afi, safi); + assert(paf); + subgrp = PAF_SUBGRP(paf); + if (!subgrp) /* not an established session */ + continue; + subgroup_process_announce_selected (subgrp, new_select, rn); } } else @@ -1686,12 +2230,14 @@ bgp_process_rsclient (struct work_queue *wq, void *data) bgp_info_unset_flag (rn, new_select, BGP_INFO_ATTR_CHANGED); UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } - bgp_process_announce_selected (rsclient, new_select, rn, afi, safi); + paf = peer_af_find(rsclient, afi, safi); + if (paf && (subgrp = PAF_SUBGRP(paf))) /* if an established session */ + subgroup_process_announce_selected (subgrp, new_select, rn); } if (old_select && CHECK_FLAG (old_select->flags, BGP_INFO_REMOVED)) bgp_info_reap (rn, old_select); - + UNSET_FLAG (rn->flags, BGP_NODE_PROCESS_SCHEDULED); return WQ_SUCCESS; } @@ -1752,6 +2298,10 @@ bgp_process_main (struct work_queue *wq, void *data) /* If the user did "clear ip bgp prefix x.x.x.x" this flag will be set */ UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR); + /* bestpath has changed; bump version */ + if (old_select || new_select) + bgp_bump_version(rn); + if (old_select) bgp_info_unset_flag (rn, old_select, BGP_INFO_SELECTED); if (new_select) @@ -1761,12 +2311,7 @@ bgp_process_main (struct work_queue *wq, void *data) UNSET_FLAG (new_select->flags, BGP_INFO_MULTIPATH_CHG); } - - /* Check each BGP peer. */ - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - { - bgp_process_announce_selected (peer, new_select, rn, afi, safi); - } + group_announce_route(bgp, afi, safi, rn, new_select); /* FIB update. */ if ((safi == SAFI_UNICAST || safi == SAFI_MULTICAST) && (! bgp->name && @@ -1810,27 +2355,6 @@ bgp_processq_del (struct work_queue *wq, void *data) XFREE (MTYPE_BGP_PROCESS_QUEUE, pq); } -static void -bgp_process_queue_complete (struct work_queue *wq) -{ - struct bgp *bgp; - struct peer *peer; - struct listnode *node, *nnode; - - /* Schedule write thread either directly or through the MRAI timer - * if needed. - */ - bgp = bgp_get_default (); - if (!bgp) - return; - - if (BGP_ROUTE_ADV_HOLD(bgp)) - return; - - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - bgp_peer_schedule_updates(peer); -} - void bgp_process_queue_init (void) { @@ -1847,7 +2371,6 @@ bgp_process_queue_init (void) bm->process_main_queue->spec.workfunc = &bgp_process_main; bm->process_main_queue->spec.del_item_data = &bgp_processq_del; - bm->process_main_queue->spec.completion_func = &bgp_process_queue_complete; bm->process_main_queue->spec.max_retries = 0; bm->process_main_queue->spec.hold = 50; /* Use a higher yield value of 50ms for main queue processing */ @@ -2165,7 +2688,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, && attrhash_cmp (ri->attr, attr_new)) { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d for RS-client %s...duplicate ignored", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2183,7 +2706,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, bgp_info_restore (rn, ri); /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d for RS-client %s", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2210,7 +2733,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, } /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { zlog_debug ("%s rcvd %s/%d for RS-client %s", peer->host, @@ -2240,7 +2763,7 @@ bgp_update_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, filtered: /* This BGP update is filtered. Log the reason then update BGP entry. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd UPDATE about %s/%d -- DENIED for RS-client %s due to: %s", peer->host, inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2276,7 +2799,7 @@ bgp_withdraw_rsclient (struct peer *rsclient, afi_t afi, safi_t safi, /* Withdraw specified route from routing table. */ if (ri && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) bgp_rib_withdraw (rn, ri, peer, afi, safi); - else if (bgp_debug_update(peer, p, 1)) + else if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s Can't find the route %s/%d", peer->host, inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen); @@ -2408,7 +2931,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, && peer->sort == BGP_PEER_EBGP && CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2422,7 +2945,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } else /* Duplicate - odd */ { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { if (!peer->rcvd_attr_printed) { @@ -2453,7 +2976,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, /* Withdraw/Announce before we fully processed the withdraw */ if (CHECK_FLAG(ri->flags, BGP_INFO_REMOVED)) { - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d, flapped quicker than processing", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2462,7 +2985,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd %s/%d", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2546,7 +3069,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } // End of implicit withdraw /* Received Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { if (!peer->rcvd_attr_printed) { @@ -2618,7 +3141,7 @@ bgp_update_main (struct peer *peer, struct prefix *p, u_int32_t addpath_id, /* This BGP update is filtered. Log the reason then update BGP entry. */ filtered: - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) { if (!peer->rcvd_attr_printed) { @@ -2689,7 +3212,7 @@ bgp_withdraw (struct peer *peer, struct prefix *p, u_int32_t addpath_id, } /* Logging. */ - if (bgp_debug_update(peer, p, 1)) + if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s rcvd UPDATE about %s/%d -- withdrawn", peer->host, inet_ntop(p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), @@ -2713,7 +3236,7 @@ bgp_withdraw (struct peer *peer, struct prefix *p, u_int32_t addpath_id, /* Withdraw specified route from routing table. */ if (ri && ! CHECK_FLAG (ri->flags, BGP_INFO_HISTORY)) bgp_rib_withdraw (rn, ri, peer, afi, safi); - else if (bgp_debug_update(peer, p, 1)) + else if (bgp_debug_update(peer, p, NULL, 1)) zlog_debug ("%s Can't find the route %s/%d", peer->host, inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen); @@ -2727,175 +3250,103 @@ bgp_withdraw (struct peer *peer, struct prefix *p, u_int32_t addpath_id, void bgp_default_originate (struct peer *peer, afi_t afi, safi_t safi, int withdraw) { - struct bgp *bgp; - struct attr attr; - struct aspath *aspath; - struct prefix p; - struct peer *from; - struct bgp_node *rn; - struct bgp_info *ri; - int ret = RMAP_DENYMATCH; - - if (!(afi == AFI_IP || afi == AFI_IP6)) - return; - - bgp = peer->bgp; - from = bgp->peer_self; - - bgp_attr_default_set (&attr, BGP_ORIGIN_IGP); - aspath = attr.aspath; - attr.local_pref = bgp->default_local_pref; - memcpy (&attr.nexthop, &peer->nexthop.v4, IPV4_MAX_BYTELEN); - - if (afi == AFI_IP) - str2prefix ("0.0.0.0/0", &p); -#ifdef HAVE_IPV6 - else if (afi == AFI_IP6) - { - struct attr_extra *ae = attr.extra; + struct update_subgroup *subgrp; + subgrp = peer_subgroup(peer, afi, safi); + subgroup_default_originate(subgrp, withdraw); +} - str2prefix ("::/0", &p); - /* IPv6 global nexthop must be included. */ - memcpy (&ae->mp_nexthop_global, &peer->nexthop.v6_global, - IPV6_MAX_BYTELEN); - ae->mp_nexthop_len = 16; +/* + * bgp_stop_announce_route_timer + */ +void +bgp_stop_announce_route_timer (struct peer_af *paf) +{ + if (!paf->t_announce_route) + return; - /* If the peer is on shared nextwork and we have link-local - nexthop set it. */ - if (peer->shared_network - && !IN6_IS_ADDR_UNSPECIFIED (&peer->nexthop.v6_local)) - { - memcpy (&ae->mp_nexthop_local, &peer->nexthop.v6_local, - IPV6_MAX_BYTELEN); - ae->mp_nexthop_len = 32; - } - } -#endif /* HAVE_IPV6 */ - - if (peer->default_rmap[afi][safi].name) - { - SET_FLAG (bgp->peer_self->rmap_type, PEER_RMAP_TYPE_DEFAULT); - for (rn = bgp_table_top(bgp->rib[afi][safi]); rn; rn = bgp_route_next(rn)) - { - for (ri = rn->info; ri; ri = ri->next) - { - struct attr dummy_attr; - struct attr_extra dummy_extra; - struct bgp_info info; - - /* Provide dummy so the route-map can't modify the attributes */ - dummy_attr.extra = &dummy_extra; - bgp_attr_dup(&dummy_attr, ri->attr); - info.peer = ri->peer; - info.attr = &dummy_attr; - - ret = route_map_apply(peer->default_rmap[afi][safi].map, &rn->p, - RMAP_BGP, &info); - - /* The route map might have set attributes. If we don't flush them - * here, they will be leaked. */ - bgp_attr_flush(&dummy_attr); - if (ret != RMAP_DENYMATCH) - break; - } - if (ret != RMAP_DENYMATCH) - break; - } - bgp->peer_self->rmap_type = 0; - - if (ret == RMAP_DENYMATCH) - withdraw = 1; - } - - if (withdraw) - { - if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) - bgp_default_withdraw_send (peer, afi, safi); - UNSET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE); - } - else - { - if (! CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) - { - SET_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE); - bgp_default_update_send (peer, &attr, afi, safi, from); - } - } - - bgp_attr_extra_free (&attr); - aspath_unintern (&aspath); + THREAD_TIMER_OFF (paf->t_announce_route); } -static void -bgp_announce_table (struct peer *peer, afi_t afi, safi_t safi, - struct bgp_table *table, int rsclient) +/* + * bgp_announce_route_timer_expired + * + * Callback that is invoked when the route announcement timer for a + * peer_af expires. + */ +static int +bgp_announce_route_timer_expired (struct thread *t) { - struct bgp_node *rn; - struct bgp_info *ri; - struct attr attr; - struct attr_extra extra; + struct peer_af *paf; + struct peer *peer; - if (! table) - table = (rsclient) ? peer->rib[afi][safi] : peer->bgp->rib[afi][safi]; - if (safi != SAFI_MPLS_VPN - && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE)) - bgp_default_originate (peer, afi, safi, 0); + paf = THREAD_ARG (t); + peer = paf->peer; - /* It's initialized in bgp_announce_[check|check_rsclient]() */ - attr.extra = &extra; + assert (paf->t_announce_route); + paf->t_announce_route = NULL; - for (rn = bgp_table_top (table); rn; rn = bgp_route_next(rn)) - for (ri = rn->info; ri; ri = ri->next) - if (CHECK_FLAG (ri->flags, BGP_INFO_SELECTED) && ri->peer != peer) - { - if ( (rsclient) ? - (bgp_announce_check_rsclient (ri, peer, &rn->p, &attr, afi, safi)) - : (bgp_announce_check (ri, peer, &rn->p, &attr, afi, safi))) - bgp_adj_out_set (rn, peer, &rn->p, &attr, afi, safi, ri); - else - bgp_adj_out_unset (rn, peer, &rn->p, afi, safi); - } + if (peer->status != Established) + return 0; + + if (!peer->afc_nego[paf->afi][paf->safi]) + return 0; + + peer_af_announce_route (paf, 1); + return 0; } +/* + * bgp_announce_route + * + * *Triggers* announcement of routes of a given AFI/SAFI to a peer. + */ void bgp_announce_route (struct peer *peer, afi_t afi, safi_t safi) { - struct bgp_node *rn; - struct bgp_table *table; - - if (peer->status != Established) - return; + struct peer_af *paf; + struct update_subgroup *subgrp; - if (! peer->afc_nego[afi][safi]) + paf = peer_af_find (peer, afi, safi); + if (!paf) return; + subgrp = PAF_SUBGRP(paf); - /* First update is deferred until ORF or ROUTE-REFRESH is received */ - if (CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_ORF_WAIT_REFRESH)) + /* + * Ignore if subgroup doesn't exist (implies AF is not negotiated) + * or a refresh has already been triggered. + */ + if (!subgrp || paf->t_announce_route) return; - if (safi != SAFI_MPLS_VPN) - bgp_announce_table (peer, afi, safi, NULL, 0); - else - for (rn = bgp_table_top (peer->bgp->rib[afi][safi]); rn; - rn = bgp_route_next(rn)) - if ((table = (rn->info)) != NULL) - bgp_announce_table (peer, afi, safi, table, 0); - - if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) - bgp_announce_table (peer, afi, safi, NULL, 1); - /* - * The write thread needs to be scheduled since it may not be done as - * part of building adj_out. + * Start a timer to stagger/delay the announce. This serves + * two purposes - announcement can potentially be combined for + * multiple peers and the announcement doesn't happen in the + * vty context. */ - bgp_peer_schedule_updates(peer); -} - + THREAD_TIMER_MSEC_ON (master, paf->t_announce_route, + bgp_announce_route_timer_expired, paf, + (subgrp->peer_count == 1) ? + BGP_ANNOUNCE_ROUTE_SHORT_DELAY_MS : + BGP_ANNOUNCE_ROUTE_DELAY_MS); +} + +/* + * Announce routes from all AF tables to a peer. + * + * This should ONLY be called when there is a need to refresh the + * routes to the peer based on a policy change for this peer alone + * or a route refresh request received from the peer. + * The operation will result in splitting the peer from its existing + * subgroups and putting it in new subgroups. + */ void bgp_announce_route_all (struct peer *peer) { + struct peer_af *paf; + int af; afi_t afi; safi_t safi; @@ -3146,6 +3597,10 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, bgp_unlock_node (rn); break; } + + /* + * Can't do this anymore. adj-outs are not maintained per peer. + * for (aout = rn->adj_out; aout; aout = aout->next) if (aout->peer == peer || purpose == BGP_CLEAR_ROUTE_MY_RSCLIENT) { @@ -3153,7 +3608,7 @@ bgp_clear_route_table (struct peer *peer, afi_t afi, safi_t safi, bgp_unlock_node (rn); break; } - + */ for (ri = rn->info; ri; ri = ri->next) if (ri->peer == peer || purpose == BGP_CLEAR_ROUTE_MY_RSCLIENT) { @@ -3641,7 +4096,7 @@ bgp_static_update_rsclient (struct peer *rsclient, struct prefix *p, == RMAP_DENY) { /* This BGP update is filtered. Log the reason then update BGP entry. */ - if (bgp_debug_update(rsclient, p, 1)) + if (bgp_debug_update(rsclient, p, NULL, 1)) zlog_debug ("Static UPDATE about %s/%d -- DENIED for RS-client %s due to: import-policy", inet_ntop (p->family, &p->u.prefix, buf, SU_ADDRSTRLEN), p->prefixlen, rsclient->host); @@ -6613,11 +7068,6 @@ route_vty_out_detail (struct vty *vty, struct bgp *bgp, struct prefix *p, vty_out (vty, "%s", VTY_NEWLINE); } -#define BGP_SHOW_SCODE_HEADER "Status codes: s suppressed, d damped, "\ - "h history, * valid, > best, = multipath,%s"\ - " i internal, r RIB-failure, S Stale, R Removed%s" -#define BGP_SHOW_OCODE_HEADER "Origin codes: i - IGP, e - EGP, ? - incomplete%s%s" -#define BGP_SHOW_HEADER " Network Next Hop Metric LocPrf Weight Path%s" #define BGP_SHOW_HEADER_CSV "Flags, Network, Next Hop, Metric, LocPrf, Weight, Path%s" #define BGP_SHOW_DAMP_HEADER " Network From Reuse Path%s" #define BGP_SHOW_FLAP_HEADER " Network From Flaps Duration Reuse Path%s" @@ -6826,7 +7276,7 @@ bgp_show_table (struct vty *vty, struct bgp_table *table, struct in_addr *router } else if (header) { - vty_out (vty, "BGP table version is 0, local router ID is %s%s", inet_ntoa (*router_id), VTY_NEWLINE); + vty_out (vty, "BGP table version is %llu, local router ID is %s%s", table->version, inet_ntoa (*router_id), VTY_NEWLINE); vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); if (type == bgp_show_type_dampend_paths @@ -10614,7 +11064,7 @@ show_adj_route (struct vty *vty, struct peer *peer, afi_t afi, safi_t safi, if (! in && CHECK_FLAG (peer->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) { - vty_out (vty, "BGP table version is 0, local router ID is %s%s", inet_ntoa (bgp->router_id), VTY_NEWLINE); + vty_out (vty, "BGP table version is %llu, local router ID is %s%s", table->version, inet_ntoa (bgp->router_id), VTY_NEWLINE); vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); @@ -10661,47 +11111,48 @@ show_adj_route (struct vty *vty, struct peer *peer, afi_t afi, safi_t safi, } else { - for (adj = rn->adj_out; adj; adj = adj->next) - if (adj->peer == peer) - { - if (header1) - { - vty_out (vty, "BGP table version is 0, local router ID is %s%s", inet_ntoa (bgp->router_id), VTY_NEWLINE); - vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); - vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); - header1 = 0; - } - if (header2) - { - if (delim) - vty_out (vty, BGP_SHOW_HEADER_CSV, VTY_NEWLINE); - else - vty_out (vty, BGP_SHOW_HEADER, VTY_NEWLINE); - header2 = 0; - } - if (adj->attr) - { - if (!CHECK_FLAG(peer->af_flags[afi][safi], - PEER_FLAG_REFLECTOR_CLIENT) - || bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) - { - - bgp_attr_dup(&attr, adj->attr); - ret = bgp_output_modifier(peer, &rn->p, &attr, afi, - safi, rmap_name); - } - else - ret = RMAP_PERMIT; + adj = bgp_adj_peer_lookup(peer, rn); + if (adj) + { + if (header1) + { + vty_out (vty, "BGP table version is %llu, local router ID " + "is %s%s", table->version, + inet_ntoa (bgp->router_id), VTY_NEWLINE); + vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + header1 = 0; + } + if (header2) + { + if (delim) + vty_out (vty, BGP_SHOW_HEADER_CSV, VTY_NEWLINE); + else + vty_out (vty, BGP_SHOW_HEADER, VTY_NEWLINE); + header2 = 0; + } + if (adj->attr) + { + if (!CHECK_FLAG(peer->af_flags[afi][safi], + PEER_FLAG_REFLECTOR_CLIENT) + || bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) + { + bgp_attr_dup(&attr, adj->attr); + ret = bgp_output_modifier(peer, &rn->p, &attr, afi, + safi, rmap_name); + } + else + ret = RMAP_PERMIT; - if (ret != RMAP_DENY) - { - route_vty_out_tmp (vty, &rn->p, &attr, safi, delim); - output_count++; - } - else - filtered_count++; - } - } + if (ret != RMAP_DENY) + { + route_vty_out_tmp (vty, &rn->p, &attr, safi, delim); + output_count++; + } + else + filtered_count++; + } + } } if (output_count != 0) diff --git a/bgpd/bgp_route.h b/bgpd/bgp_route.h index c35b5f111505..d6af44668746 100644 --- a/bgpd/bgp_route.h +++ b/bgpd/bgp_route.h @@ -26,6 +26,12 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA struct bgp_nexthop_cache; +#define BGP_SHOW_SCODE_HEADER "Status codes: s suppressed, d damped, "\ + "h history, * valid, > best, = multipath,%s"\ + " i internal, r RIB-failure, S Stale, R Removed%s" +#define BGP_SHOW_OCODE_HEADER "Origin codes: i - IGP, e - EGP, ? - incomplete%s%s" +#define BGP_SHOW_HEADER " Network Next Hop Metric LocPrf Weight Path%s" + /* Ancillary information to struct bgp_info, * used for uncommonly used data (aggregation, MPLS, etc.) * and lazily allocated to save memory. @@ -199,12 +205,19 @@ enum bgp_path_type BGP_PATH_MULTIPATH }; +static inline void +bgp_bump_version (struct bgp_node *node) +{ + node->version = bgp_table_next_version(bgp_node_table(node)); +} + /* Prototypes. */ extern void bgp_process_queue_init (void); extern void bgp_route_init (void); extern void bgp_route_finish (void); extern void bgp_cleanup_routes (void); extern void bgp_announce_route (struct peer *, afi_t, safi_t); +extern void bgp_stop_announce_route_timer(struct peer_af *paf); extern void bgp_announce_route_all (struct peer *); extern void bgp_default_originate (struct peer *, afi_t, safi_t, int); extern void bgp_soft_reconfig_in (struct peer *, afi_t, safi_t); @@ -280,4 +293,13 @@ extern void route_vty_out (struct vty *, struct prefix *, struct bgp_info *, int extern void route_vty_out_tag (struct vty *, struct prefix *, struct bgp_info *, int, safi_t); extern void route_vty_out_tmp (struct vty *, struct prefix *, struct attr *, safi_t, char *); +extern int +subgroup_process_announce_selected (struct update_subgroup *subgrp, + struct bgp_info *selected, + struct bgp_node *rn); + +extern int subgroup_announce_check(struct bgp_info *ri, + struct update_subgroup *subgrp, + struct prefix *p, struct attr *attr); + #endif /* _QUAGGA_BGP_ROUTE_H */ diff --git a/bgpd/bgp_routemap.c b/bgpd/bgp_routemap.c index 2e68f31abc00..c2dba97a7009 100644 --- a/bgpd/bgp_routemap.c +++ b/bgpd/bgp_routemap.c @@ -40,6 +40,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "buffer.h" #include "sockunion.h" #include "hash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -1103,19 +1104,26 @@ route_set_ip_nexthop (void *rule, struct prefix *prefix, bgp_info->attr->nexthop.s_addr = sockunion2ip (peer->su_remote); bgp_info->attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_NEXT_HOP); } - else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT) - && peer->su_local - && sockunion_family (peer->su_local) == AF_INET) + else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT)) { - bgp_info->attr->nexthop.s_addr = sockunion2ip (peer->su_local); - bgp_info->attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_NEXT_HOP); + /* The next hop value will be set as part of packet rewrite. + * Set the flags here to indicate that rewrite needs to be done. + * Also, clear the value. + */ + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_PEER_ADDRESS); + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); + bgp_info->attr->nexthop.s_addr = 0; } } else { - /* Set next hop value. */ + /* Set next hop value. */ bgp_info->attr->flag |= ATTR_FLAG_BIT (BGP_ATTR_NEXT_HOP); bgp_info->attr->nexthop = *rins->address; + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); } } @@ -2173,10 +2181,13 @@ route_set_ipv6_nexthop_global (void *rule, struct prefix *prefix, /* Set next hop value. */ (bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_global = *address; - + /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len == 0) bgp_info->attr->extra->mp_nexthop_len = 16; + + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); } return RMAP_OKAY; @@ -2241,6 +2252,9 @@ route_set_ipv6_nexthop_local (void *rule, struct prefix *prefix, /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len != 32) bgp_info->attr->extra->mp_nexthop_len = 32; + + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); } return RMAP_OKAY; @@ -2313,20 +2327,20 @@ route_set_ipv6_nexthop_peer (void *rule, struct prefix *prefix, INET6_ADDRSTRLEN), &peer_address); } - else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT) - && peer->su_local - && sockunion_family (peer->su_local) == AF_INET6) + else if (CHECK_FLAG (peer->rmap_type, PEER_RMAP_TYPE_OUT)) { - inet_pton (AF_INET, sockunion2str (peer->su_local, - peer_addr_buf, - INET6_ADDRSTRLEN), - &peer_address); + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_PEER_ADDRESS); + SET_FLAG(bgp_info->attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED); + /* clear next hop value. */ + memset (&((bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_global), + 0, sizeof (struct in6_addr)); } if (IN6_IS_ADDR_LINKLOCAL(&peer_address)) { - /* Set next hop value. */ - (bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_local = peer_address; + /* The next hop value will be set as part of packet rewrite. */ /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len != 32) @@ -2334,8 +2348,7 @@ route_set_ipv6_nexthop_peer (void *rule, struct prefix *prefix, } else { - /* Set next hop value. */ - (bgp_attr_extra_get (bgp_info->attr))->mp_nexthop_global = peer_address; + /* The next hop value will be set as part of packet rewrite. */ /* Set nexthop length. */ if (bgp_info->attr->extra->mp_nexthop_len == 0) @@ -2652,7 +2665,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (inbound, soft-reconfig)", rmap_name, peer->host); @@ -2663,7 +2676,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (inbound, route-refresh)", rmap_name, peer->host); @@ -2699,7 +2712,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, if (CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (import, soft-reconfig)", rmap_name, peer->host); @@ -2709,7 +2722,7 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, else if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) { - if (bgp_debug_update(peer, NULL, 1)) + if (bgp_debug_update(peer, NULL, NULL, 1)) zlog_debug("Processing route_map %s update on " "peer %s (import, route-refresh)", rmap_name, peer->host); @@ -2719,27 +2732,24 @@ bgp_route_map_process_peer (char *rmap_name, struct peer *peer, } } + /* + * For outbound, unsuppress and default-originate map change (content or + * map created), merely update the "config" here, the actual route + * announcement happens at the group level. + */ if (filter->map[RMAP_OUT].name && - (strcmp(rmap_name, filter->map[RMAP_OUT].name) == 0)) - { - filter->map[RMAP_OUT].map = - route_map_lookup_by_name (filter->map[RMAP_OUT].name); - - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("Processing route_map %s update on peer %s (outbound)", - rmap_name, peer->host); - - if (route_update) - bgp_announce_route_all(peer); - } + (strcmp(rmap_name, filter->map[RMAP_OUT].name) == 0)) + filter->map[RMAP_OUT].map = + route_map_lookup_by_name (filter->map[RMAP_OUT].name); if (filter->usmap.name && (strcmp(rmap_name, filter->usmap.name) == 0)) - { - filter->usmap.map = route_map_lookup_by_name (filter->usmap.name); - if (route_update) - bgp_announce_route_all(peer); - } + filter->usmap.map = route_map_lookup_by_name (filter->usmap.name); + + if (peer->default_rmap[afi][safi].name && + (strcmp (rmap_name, peer->default_rmap[afi][safi].name) == 0)) + peer->default_rmap[afi][safi].map = + route_map_lookup_by_name (peer->default_rmap[afi][safi].name); } static void @@ -2807,26 +2817,16 @@ bgp_route_map_process_update (void *arg, char *rmap_name, int route_update) if (! peer->afc[afi][safi]) continue; - /* process in/out/import/export route-maps */ + /* process in/out/import/export/default-orig route-maps */ bgp_route_map_process_peer(rmap_name, peer, afi, safi, route_update); - - /* process default-originate route-map */ - if (peer->default_rmap[afi][safi].name && - (strcmp (rmap_name, peer->default_rmap[afi][safi].name) == 0)) - { - peer->default_rmap[afi][safi].map = - route_map_lookup_by_name (peer->default_rmap[afi][safi].name); - - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("Processing route_map %s update on " - "default-originate", rmap_name); - - if (route_update) - bgp_default_originate (peer, afi, safi, 0); - } } } + /* for outbound/default-orig route-maps, process for groups */ + update_group_policy_update(bgp, BGP_POLICY_ROUTE_MAP, rmap_name, + route_update, 0); + + /* update peer-group config (template) */ bgp_route_map_update_peer_group(rmap_name, bgp); /* For table route-map updates. */ @@ -2893,7 +2893,7 @@ bgp_route_map_process_update (void *arg, char *rmap_name, int route_update) if (route_update) { - if (bgp_debug_update(peer, NULL, 0)) + if (BGP_DEBUG (zebra, ZEBRA)) zlog_debug("Processing route_map %s update on " "redistributed routes", rmap_name); @@ -2932,13 +2932,18 @@ bgp_route_map_mark_update (char *rmap_name) for (ALL_LIST_ELEMENTS (bm->bgp, node, nnode, bgp)) { + if (bgp->t_rmap_update == NULL) { /* rmap_update_timer of 0 means don't do route updates */ if (bgp->rmap_update_timer) - bgp->t_rmap_update = - thread_add_timer(master, bgp_route_map_update_timer, bgp, - bgp->rmap_update_timer); + { + bgp->t_rmap_update = + thread_add_timer(master, bgp_route_map_update_timer, bgp, + bgp->rmap_update_timer); + /* Signal the groups that a route-map update event has started */ + update_group_policy_update(bgp, BGP_POLICY_ROUTE_MAP, rmap_name, 1, 1); + } else bgp_route_map_process_update((void *)bgp, rmap_name, 0); } diff --git a/bgpd/bgp_table.c b/bgpd/bgp_table.c index 7a6c675dc28c..45d5a9eb7123 100644 --- a/bgpd/bgp_table.c +++ b/bgpd/bgp_table.c @@ -24,6 +24,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "memory.h" #include "sockunion.h" #include "vty.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/bgpd/bgp_table.h b/bgpd/bgp_table.h index cfda36857040..ce9b9fac0e8d 100644 --- a/bgpd/bgp_table.h +++ b/bgpd/bgp_table.h @@ -43,6 +43,7 @@ struct bgp_table struct peer *owner; struct route_table *route_table; + u_int64_t version; }; struct bgp_node @@ -63,6 +64,7 @@ struct bgp_node struct bgp_node *prn; + u_int64_t version; u_char flags; #define BGP_NODE_PROCESS_SCHEDULED (1 << 0) #define BGP_NODE_USER_CLEAR (1 << 1) @@ -353,4 +355,18 @@ bgp_table_iter_started (bgp_table_iter_t * iter) return route_table_iter_started (&iter->rt_iter); } +/* This would benefit from a real atomic operation... + * until then. */ +static inline u_int64_t +bgp_table_next_version (struct bgp_table *table) +{ + return ++table->version; +} + +static inline u_int64_t +bgp_table_version (struct bgp_table *table) +{ + return table->version; +} + #endif /* _QUAGGA_BGP_TABLE_H */ diff --git a/bgpd/bgp_updgrp.c b/bgpd/bgp_updgrp.c new file mode 100644 index 000000000000..6d18e8346bdc --- /dev/null +++ b/bgpd/bgp_updgrp.c @@ -0,0 +1,1842 @@ +/** + * bgp_updgrp.c: BGP update group structures + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#include "prefix.h" +#include "thread.h" +#include "buffer.h" +#include "stream.h" +#include "command.h" +#include "sockunion.h" +#include "network.h" +#include "memory.h" +#include "filter.h" +#include "routemap.h" +#include "str.h" +#include "log.h" +#include "plist.h" +#include "linklist.h" +#include "workqueue.h" +#include "hash.h" +#include "jhash.h" +#include "queue.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_filter.h" + +/******************** + * PRIVATE FUNCTIONS + ********************/ + +/** + * assign a unique ID to update group and subgroup. Mostly for display/ + * debugging purposes. It's a 64-bit space - used leisurely without a + * worry about its wrapping and about filling gaps. While at it, timestamp + * the creation. + */ +static void +update_group_checkin (struct update_group *updgrp) +{ + updgrp->id = ++bm->updgrp_idspace; + updgrp->uptime = bgp_clock (); +} + +static void +update_subgroup_checkin (struct update_subgroup *subgrp, + struct update_group *updgrp) +{ + subgrp->id = ++bm->subgrp_idspace; + subgrp->uptime = bgp_clock (); +} + +static void +sync_init (struct update_subgroup *subgrp) +{ + subgrp->sync = XCALLOC (MTYPE_BGP_SYNCHRONISE, + sizeof (struct bgp_synchronize)); + BGP_ADV_FIFO_INIT (&subgrp->sync->update); + BGP_ADV_FIFO_INIT (&subgrp->sync->withdraw); + BGP_ADV_FIFO_INIT (&subgrp->sync->withdraw_low); + subgrp->hash = hash_create (baa_hash_key, baa_hash_cmp); + + /* We use a larger buffer for subgrp->work in the event that: + * - We RX a BGP_UPDATE where the attributes alone are just + * under BGP_MAX_PACKET_SIZE + * - The user configures an outbound route-map that does many as-path + * prepends or adds many communities. At most they can have CMD_ARGC_MAX + * args in a route-map so there is a finite limit on how large they can + * make the attributes. + * + * Having a buffer with BGP_MAX_PACKET_SIZE_OVERFLOW allows us to avoid bounds + * checking for every single attribute as we construct an UPDATE. + */ + subgrp->work = stream_new (BGP_MAX_PACKET_SIZE + BGP_MAX_PACKET_SIZE_OVERFLOW); + subgrp->scratch = stream_new (BGP_MAX_PACKET_SIZE); +} + +static void +sync_delete (struct update_subgroup *subgrp) +{ + if (subgrp->sync) + XFREE (MTYPE_BGP_SYNCHRONISE, subgrp->sync); + subgrp->sync = NULL; + if (subgrp->hash) + hash_free (subgrp->hash); + subgrp->hash = NULL; + if (subgrp->work) + stream_free (subgrp->work); + subgrp->work = NULL; + if (subgrp->scratch) + stream_free (subgrp->scratch); + subgrp->scratch = NULL; +} + +/** + * conf_copy + * + * copy only those fields that are relevant to update group match + */ +static void +conf_copy (struct peer *dst, struct peer *src, afi_t afi, safi_t safi) +{ + struct bgp_filter *srcfilter; + struct bgp_filter *dstfilter; + + srcfilter = &src->filter[afi][safi]; + dstfilter = &dst->filter[afi][safi]; + + dst->bgp = src->bgp; + dst->sort = src->sort; + dst->as = src->as; + dst->weight = src->weight; + dst->v_routeadv = src->v_routeadv; + dst->flags = src->flags; + dst->af_flags[afi][safi] = src->af_flags[afi][safi]; + dst->host = strdup (src->host); + dst->cap = src->cap; + dst->af_cap[afi][safi] = src->af_cap[afi][safi]; + dst->afc_nego[afi][safi] = src->afc_nego[afi][safi]; + dst->local_as = src->local_as; + dst->change_local_as = src->change_local_as; + dst->shared_network = src->shared_network; + memcpy (&(dst->nexthop), &(src->nexthop), sizeof (struct bgp_nexthop)); + + dst->group = src->group; + + if (src->default_rmap[afi][safi].name) + { + dst->default_rmap[afi][safi].name = + strdup (src->default_rmap[afi][safi].name); + dst->default_rmap[afi][safi].map = src->default_rmap[afi][safi].map; + } + + if (DISTRIBUTE_OUT_NAME(srcfilter)) + { + DISTRIBUTE_OUT_NAME(dstfilter) = strdup(DISTRIBUTE_OUT_NAME(srcfilter)); + DISTRIBUTE_OUT(dstfilter) = DISTRIBUTE_OUT(srcfilter); + } + + if (PREFIX_LIST_OUT_NAME(srcfilter)) + { + PREFIX_LIST_OUT_NAME(dstfilter) = strdup(PREFIX_LIST_OUT_NAME(srcfilter)); + PREFIX_LIST_OUT(dstfilter) = PREFIX_LIST_OUT(srcfilter); + } + + if (FILTER_LIST_OUT_NAME(srcfilter)) + { + FILTER_LIST_OUT_NAME(dstfilter) = strdup(FILTER_LIST_OUT_NAME(srcfilter)); + FILTER_LIST_OUT(dstfilter) = FILTER_LIST_OUT(srcfilter); + } + + if (ROUTE_MAP_OUT_NAME(srcfilter)) + { + ROUTE_MAP_OUT_NAME(dstfilter) = strdup(ROUTE_MAP_OUT_NAME(srcfilter)); + ROUTE_MAP_OUT(dstfilter) = ROUTE_MAP_OUT(srcfilter); + } + + if (UNSUPPRESS_MAP_NAME(srcfilter)) + { + UNSUPPRESS_MAP_NAME(dstfilter) = strdup(UNSUPPRESS_MAP_NAME(srcfilter)); + UNSUPPRESS_MAP(dstfilter) = UNSUPPRESS_MAP(srcfilter); + } +} + +/** + * since we did a bunch of strdup's in conf_copy, time to free them up + */ +static void +conf_release (struct peer *src, afi_t afi, safi_t safi) +{ + struct bgp_filter *srcfilter; + + srcfilter = &src->filter[afi][safi]; + + if (src->default_rmap[afi][safi].name) + free (src->default_rmap[afi][safi].name); + + if (srcfilter->dlist[FILTER_OUT].name) + free (srcfilter->dlist[FILTER_OUT].name); + + if (srcfilter->plist[FILTER_OUT].name) + free (srcfilter->plist[FILTER_OUT].name); + + if (srcfilter->aslist[FILTER_OUT].name) + free (srcfilter->aslist[FILTER_OUT].name); + + if (srcfilter->map[RMAP_OUT].name) + free (srcfilter->map[RMAP_OUT].name); + + if (srcfilter->usmap.name) + free (srcfilter->usmap.name); +} + +static void +peer2_updgrp_copy (struct update_group *updgrp, struct peer_af *paf) +{ + struct peer *src; + struct peer *dst; + + if (!updgrp || !paf) + return; + + src = paf->peer; + dst = updgrp->conf; + if (!src || !dst) + return; + + updgrp->afi = paf->afi; + updgrp->safi = paf->safi; + updgrp->afid = paf->afid; + updgrp->bgp = src->bgp; + + conf_copy (dst, src, paf->afi, paf->safi); +} + +/** + * auxiliary functions to maintain the hash table. + * - updgrp_hash_alloc - to create a new entry, passed to hash_get + * - updgrp_hash_key_make - makes the key for update group search + * - updgrp_hash_cmp - compare two update groups. + */ +static void * +updgrp_hash_alloc (void *p) +{ + struct update_group *updgrp; + struct update_group *in; + + in = p; + updgrp = XCALLOC (MTYPE_BGP_UPDGRP, sizeof (struct update_group)); + memcpy (updgrp, in, sizeof (struct update_group)); + updgrp->conf = XCALLOC (MTYPE_BGP_PEER, sizeof (struct peer)); + conf_copy (updgrp->conf, in->conf, in->afi, in->safi); + return updgrp; +} + +/** + * The hash value for a peer is computed from the following variables: + * v = f( + * 1. IBGP (1) or EBGP (2) + * 2. FLAGS based on configuration: + * LOCAL_AS_NO_PREPEND + * LOCAL_AS_REPLACE_AS + * 3. AF_FLAGS based on configuration: + * Refer to definition in bgp_updgrp.h + * 4. (AF-independent) Capability flags: + * AS4_RCV capability + * 5. (AF-dependent) Capability flags: + * ORF_PREFIX_SM_RCV (peer can send prefix ORF) + * 6. MRAI + * 7. peer-group name + * 8. Outbound route-map name (neighbor route-map <> out) + * 9. Outbound distribute-list name (neighbor distribute-list <> out) + * 10. Outbound prefix-list name (neighbor prefix-list <> out) + * 11. Outbound as-list name (neighbor filter-list <> out) + * 12. Unsuppress map name (neighbor unsuppress-map <>) + * 13. default rmap name (neighbor default-originate route-map <>) + * 14. encoding both global and link-local nexthop? + * 15. If peer is configured to be a lonesoul, peer ip address + * 16. Local-as should match, if configured. + * ) + */ +static unsigned int +updgrp_hash_key_make (void *p) +{ + const struct update_group *updgrp; + const struct peer *peer; + const struct bgp_filter *filter; + uint32_t flags; + uint32_t key; + afi_t afi; + safi_t safi; + +#define SEED1 999331 +#define SEED2 2147483647 + + updgrp = p; + peer = updgrp->conf; + afi = updgrp->afi; + safi = updgrp->safi; + flags = peer->af_flags[afi][safi]; + filter = &peer->filter[afi][safi]; + + key = 0; + + key = jhash_1word (peer->sort, key); /* EBGP or IBGP */ + key = jhash_1word ((peer->flags & PEER_UPDGRP_FLAGS), key); + key = jhash_1word ((flags & PEER_UPDGRP_AF_FLAGS), key); + key = jhash_1word ((peer->cap & PEER_UPDGRP_CAP_FLAGS), key); + key = jhash_1word ((peer->af_cap[afi][safi] & + PEER_UPDGRP_AF_CAP_FLAGS), key); + key = jhash_1word (peer->v_routeadv, key); + key = jhash_1word (peer->change_local_as, key); + + if (peer->group) + key = jhash_1word (jhash (peer->group->name, + strlen (peer->group->name), SEED1), key); + + if (filter->map[RMAP_OUT].name) + key = jhash_1word (jhash (filter->map[RMAP_OUT].name, + strlen (filter->map[RMAP_OUT].name), SEED1), + key); + + if (filter->dlist[FILTER_OUT].name) + key = jhash_1word (jhash (filter->dlist[FILTER_OUT].name, + strlen (filter->dlist[FILTER_OUT].name), SEED1), + key); + + if (filter->plist[FILTER_OUT].name) + key = jhash_1word (jhash (filter->plist[FILTER_OUT].name, + strlen (filter->plist[FILTER_OUT].name), SEED1), + key); + + if (filter->aslist[FILTER_OUT].name) + key = jhash_1word (jhash (filter->aslist[FILTER_OUT].name, + strlen (filter->aslist[FILTER_OUT].name), + SEED1), key); + + if (filter->usmap.name) + key = jhash_1word (jhash (filter->usmap.name, + strlen (filter->usmap.name), SEED1), key); + + if (peer->default_rmap[afi][safi].name) + key = jhash_1word (jhash (peer->default_rmap[afi][safi].name, + strlen (peer->default_rmap[afi][safi].name), + SEED1), key); + + /* If peer is on a shared network and is exchanging IPv6 prefixes, + * it needs to include link-local address. That's different from + * non-shared-network peers (nexthop encoded with 32 bytes vs 16 + * bytes). We create different update groups to take care of that. + */ + key = jhash_1word ((peer->shared_network && + peer_afi_active_nego (peer, AFI_IP6)), + key); + + /* + * Every peer configured to be a lonesoul gets its own update group. + * + * Every route server client gets its own update group as well. Optimize + * later. + */ + if (CHECK_FLAG (peer->flags, PEER_FLAG_LONESOUL) || + CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) + key = jhash_1word (jhash (peer->host, strlen (peer->host), SEED2), key); + + return key; +} + +static int +updgrp_hash_cmp (const void *p1, const void *p2) +{ + const struct update_group *grp1; + const struct update_group *grp2; + const struct peer *pe1; + const struct peer *pe2; + uint32_t flags1; + uint32_t flags2; + const struct bgp_filter *fl1; + const struct bgp_filter *fl2; + afi_t afi; + safi_t safi; + + if (!p1 || !p2) + return 0; + + grp1 = p1; + grp2 = p2; + pe1 = grp1->conf; + pe2 = grp2->conf; + afi = grp1->afi; + safi = grp1->safi; + flags1 = pe1->af_flags[afi][safi]; + flags2 = pe2->af_flags[afi][safi]; + fl1 = &pe1->filter[afi][safi]; + fl2 = &pe2->filter[afi][safi]; + + /* put EBGP and IBGP peers in different update groups */ + if (pe1->sort != pe2->sort) + return 0; + + /* check peer flags */ + if ((pe1->flags & PEER_UPDGRP_FLAGS) != + (pe2->flags & PEER_UPDGRP_FLAGS)) + return 0; + + /* If there is 'local-as' configured, it should match. */ + if (pe1->change_local_as != pe2->change_local_as) + return 0; + + /* flags like route reflector client */ + if ((flags1 & PEER_UPDGRP_AF_FLAGS) != (flags2 & PEER_UPDGRP_AF_FLAGS)) + return 0; + + if ((pe1->cap & PEER_UPDGRP_CAP_FLAGS) != + (pe2->cap & PEER_UPDGRP_CAP_FLAGS)) + return 0; + + if ((pe1->af_cap[afi][safi] & PEER_UPDGRP_AF_CAP_FLAGS) != + (pe2->af_cap[afi][safi] & PEER_UPDGRP_AF_CAP_FLAGS)) + return 0; + + if (pe1->v_routeadv != pe2->v_routeadv) + return 0; + + if (pe1->group != pe2->group) + return 0; + + /* route-map names should be the same */ + if ((fl1->map[RMAP_OUT].name && !fl2->map[RMAP_OUT].name) || + (!fl1->map[RMAP_OUT].name && fl2->map[RMAP_OUT].name) || + (fl1->map[RMAP_OUT].name && fl2->map[RMAP_OUT].name && + strcmp (fl1->map[RMAP_OUT].name, fl2->map[RMAP_OUT].name))) + return 0; + + if ((fl1->dlist[FILTER_OUT].name && !fl2->dlist[FILTER_OUT].name) || + (!fl1->dlist[FILTER_OUT].name && fl2->dlist[FILTER_OUT].name) || + (fl1->dlist[FILTER_OUT].name && fl2->dlist[FILTER_OUT].name && + strcmp (fl1->dlist[FILTER_OUT].name, fl2->dlist[FILTER_OUT].name))) + return 0; + + if ((fl1->plist[FILTER_OUT].name && !fl2->plist[FILTER_OUT].name) || + (!fl1->plist[FILTER_OUT].name && fl2->plist[FILTER_OUT].name) || + (fl1->plist[FILTER_OUT].name && fl2->plist[FILTER_OUT].name && + strcmp (fl1->plist[FILTER_OUT].name, fl2->plist[FILTER_OUT].name))) + return 0; + + if ((fl1->aslist[FILTER_OUT].name && !fl2->aslist[FILTER_OUT].name) || + (!fl1->aslist[FILTER_OUT].name && fl2->aslist[FILTER_OUT].name) || + (fl1->aslist[FILTER_OUT].name && fl2->aslist[FILTER_OUT].name && + strcmp (fl1->aslist[FILTER_OUT].name, fl2->aslist[FILTER_OUT].name))) + return 0; + + if ((fl1->usmap.name && !fl2->usmap.name) || + (!fl1->usmap.name && fl2->usmap.name) || + (fl1->usmap.name && fl2->usmap.name && + strcmp (fl1->usmap.name, fl2->usmap.name))) + return 0; + + if ((pe1->default_rmap[afi][safi].name && + !pe2->default_rmap[afi][safi].name) || + (!pe1->default_rmap[afi][safi].name && + pe2->default_rmap[afi][safi].name) || + (pe1->default_rmap[afi][safi].name && + pe2->default_rmap[afi][safi].name && + strcmp (pe1->default_rmap[afi][safi].name, + pe2->default_rmap[afi][safi].name))) + return 0; + + if ((afi == AFI_IP6) && (pe1->shared_network != pe2->shared_network)) + return 0; + + if ((CHECK_FLAG (pe1->flags, PEER_FLAG_LONESOUL) || + CHECK_FLAG (pe1->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)) && + !sockunion_same (&pe1->su, &pe2->su)) + return 0; + + return 1; +} + +static void +peer_lonesoul_or_not (struct peer *peer, int set) +{ + /* no change in status? */ + if (set == (CHECK_FLAG (peer->flags, PEER_FLAG_LONESOUL) > 0)) + return; + + if (set) + SET_FLAG (peer->flags, PEER_FLAG_LONESOUL); + else + UNSET_FLAG (peer->flags, PEER_FLAG_LONESOUL); + + update_group_adjust_peer_afs (peer); +} + +/* + * subgroup_total_packets_enqueued + * + * Returns the total number of packets enqueued to a subgroup. + */ +static unsigned int +subgroup_total_packets_enqueued (struct update_subgroup *subgrp) +{ + struct bpacket *pkt; + + pkt = bpacket_queue_last (SUBGRP_PKTQ (subgrp)); + + return pkt->ver - 1; +} + +static int +update_group_show_walkcb (struct update_group *updgrp, void *arg) +{ + struct vty *vty = arg; + struct update_subgroup *subgrp; + struct peer_af *paf; + struct bgp_filter *filter; + + vty_out (vty, "Update-group %llu:%s", updgrp->id, VTY_NEWLINE); + vty_out (vty, " Created: %s", timestamp_string (updgrp->uptime)); + filter = &updgrp->conf->filter[updgrp->afi][updgrp->safi]; + if (filter->map[RMAP_OUT].name) + vty_out (vty, " Outgoing route map: %s%s%s", + filter->map[RMAP_OUT].map ? "X" : "", + filter->map[RMAP_OUT].name, VTY_NEWLINE); + vty_out (vty, " MRAI value (seconds): %d%s", + updgrp->conf->v_routeadv, VTY_NEWLINE); + if (updgrp->conf->change_local_as) + vty_out (vty, " Local AS %u%s%s%s", + updgrp->conf->change_local_as, + CHECK_FLAG (updgrp->conf->flags, + PEER_FLAG_LOCAL_AS_NO_PREPEND) ? " no-prepend" : "", + CHECK_FLAG (updgrp->conf->flags, + PEER_FLAG_LOCAL_AS_REPLACE_AS) ? " replace-as" : "", + VTY_NEWLINE); + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + vty_out (vty, "%s", VTY_NEWLINE); + vty_out (vty, " Update-subgroup %llu:%s", subgrp->id, VTY_NEWLINE); + vty_out (vty, " Created: %s", timestamp_string (subgrp->uptime)); + + if (subgrp->split_from.update_group_id || subgrp->split_from.subgroup_id) + { + vty_out (vty, " Split from group id: %llu%s", + subgrp->split_from.update_group_id, VTY_NEWLINE); + vty_out (vty, " Split from subgroup id: %llu%s", + subgrp->split_from.subgroup_id, VTY_NEWLINE); + } + + vty_out (vty, " Join events: %u%s", subgrp->join_events, VTY_NEWLINE); + vty_out (vty, " Prune events: %u%s", + subgrp->prune_events, VTY_NEWLINE); + vty_out (vty, " Merge events: %u%s", + subgrp->merge_events, VTY_NEWLINE); + vty_out (vty, " Split events: %u%s", + subgrp->split_events, VTY_NEWLINE); + vty_out (vty, " Update group switch events: %u%s", + subgrp->updgrp_switch_events, VTY_NEWLINE); + vty_out (vty, " Peer refreshes combined: %u%s", + subgrp->peer_refreshes_combined, VTY_NEWLINE); + vty_out (vty, " Merge checks triggered: %u%s", + subgrp->merge_checks_triggered, VTY_NEWLINE); + vty_out (vty, " Version: %llu%s", subgrp->version, VTY_NEWLINE); + vty_out (vty, " Packet queue length: %d%s", + bpacket_queue_length (SUBGRP_PKTQ (subgrp)), VTY_NEWLINE); + vty_out (vty, " Total packets enqueued: %u%s", + subgroup_total_packets_enqueued (subgrp), VTY_NEWLINE); + vty_out (vty, " Packet queue high watermark: %d%s", + bpacket_queue_hwm_length (SUBGRP_PKTQ (subgrp)), VTY_NEWLINE); + vty_out (vty, " Adj-out list count: %u%s", + subgrp->adj_count, VTY_NEWLINE); + vty_out (vty, " Advertise list: %s%s", + advertise_list_is_empty (subgrp) ? "empty" : "not empty", + VTY_NEWLINE); + vty_out (vty, " Flags: %s%s", + CHECK_FLAG (subgrp->flags, + SUBGRP_FLAG_NEEDS_REFRESH) ? "R" : "", VTY_NEWLINE); + if (subgrp->peer_count > 0) + { + vty_out (vty, " Peers:%s", VTY_NEWLINE); + SUBGRP_FOREACH_PEER (subgrp, paf) + vty_out (vty, " - %s%s", paf->peer->host, VTY_NEWLINE); + } + } + return UPDWALK_CONTINUE; +} + +/* + * Helper function to show the packet queue for each subgroup of update group. + * Will be constrained to a particular subgroup id if id !=0 + */ +static int +updgrp_show_packet_queue_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + struct vty *vty; + + vty = ctx->vty; + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (ctx->subgrp_id && (ctx->subgrp_id != subgrp->id)) + continue; + vty_out (vty, "update group %llu, subgroup %llu%s", updgrp->id, + subgrp->id, VTY_NEWLINE); + bpacket_queue_show_vty (SUBGRP_PKTQ (subgrp), vty); + } + return UPDWALK_CONTINUE; +} + +/* + * Show the packet queue for each subgroup of update group. Will be + * constrained to a particular subgroup id if id !=0 + */ +void +update_group_show_packet_queue (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id) +{ + struct updwalk_context ctx; + + memset (&ctx, 0, sizeof (ctx)); + ctx.vty = vty; + ctx.subgrp_id = id; + ctx.flags = 0; + update_group_af_walk (bgp, afi, safi, updgrp_show_packet_queue_walkcb, + &ctx); +} + +static struct update_group * +update_group_find (struct peer_af *paf) +{ + struct update_group *updgrp; + struct update_group tmp; + struct peer tmp_conf; + + if (!peer_established (PAF_PEER (paf))) + return NULL; + + memset (&tmp, 0, sizeof (tmp)); + memset (&tmp_conf, 0, sizeof (tmp_conf)); + tmp.conf = &tmp_conf; + peer2_updgrp_copy (&tmp, paf); + + updgrp = hash_lookup (paf->peer->bgp->update_groups[paf->afid], &tmp); + conf_release (&tmp_conf, paf->afi, paf->safi); + return updgrp; +} + +static struct update_group * +update_group_create (struct peer_af *paf) +{ + struct update_group *updgrp; + struct update_group tmp; + struct peer tmp_conf; + + memset (&tmp, 0, sizeof (tmp)); + memset (&tmp_conf, 0, sizeof (tmp_conf)); + tmp.conf = &tmp_conf; + peer2_updgrp_copy (&tmp, paf); + + updgrp = hash_get (paf->peer->bgp->update_groups[paf->afid], &tmp, + updgrp_hash_alloc); + if (!updgrp) + return NULL; + update_group_checkin (updgrp); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("create update group %llu", updgrp->id); + + UPDGRP_GLOBAL_STAT (updgrp, updgrps_created) += 1; + + return updgrp; +} + +static void +update_group_delete (struct update_group *updgrp) +{ + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("delete update group %llu", updgrp->id); + + UPDGRP_GLOBAL_STAT (updgrp, updgrps_deleted) += 1; + + hash_release (updgrp->bgp->update_groups[updgrp->afid], updgrp); + conf_release (updgrp->conf, updgrp->afi, updgrp->safi); + XFREE (MTYPE_BGP_PEER, updgrp->conf); + XFREE (MTYPE_BGP_UPDGRP, updgrp); +} + +static void +update_group_add_subgroup (struct update_group *updgrp, + struct update_subgroup *subgrp) +{ + if (!updgrp || !subgrp) + return; + + LIST_INSERT_HEAD (&(updgrp->subgrps), subgrp, updgrp_train); + subgrp->update_group = updgrp; +} + +static void +update_group_remove_subgroup (struct update_group *updgrp, + struct update_subgroup *subgrp) +{ + if (!updgrp || !subgrp) + return; + + LIST_REMOVE (subgrp, updgrp_train); + subgrp->update_group = NULL; + if (LIST_EMPTY (&(updgrp->subgrps))) + update_group_delete (updgrp); +} + +static struct update_subgroup * +update_subgroup_create (struct update_group *updgrp) +{ + struct update_subgroup *subgrp; + + subgrp = XCALLOC (MTYPE_BGP_UPD_SUBGRP, sizeof (struct update_subgroup)); + update_subgroup_checkin (subgrp, updgrp); + subgrp->v_coalesce = (UPDGRP_INST (updgrp))->coalesce_time; + sync_init (subgrp); + bpacket_queue_init (SUBGRP_PKTQ (subgrp)); + bpacket_queue_add (SUBGRP_PKTQ (subgrp), NULL, NULL); + TAILQ_INIT (&(subgrp->adjq)); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("create subgroup u%llu:s%llu", + updgrp->id, subgrp->id); + + update_group_add_subgroup (updgrp, subgrp); + + UPDGRP_INCR_STAT (updgrp, subgrps_created); + + return subgrp; +} + +static void +update_subgroup_delete (struct update_subgroup *subgrp) +{ + if (!subgrp) + return; + + if (subgrp->update_group) + UPDGRP_INCR_STAT (subgrp->update_group, subgrps_deleted); + + if (subgrp->t_merge_check) + THREAD_OFF (subgrp->t_merge_check); + + if (subgrp->t_coalesce) + THREAD_TIMER_OFF (subgrp->t_coalesce); + + bpacket_queue_cleanup (SUBGRP_PKTQ (subgrp)); + subgroup_clear_table (subgrp); + + if (subgrp->t_coalesce) + THREAD_TIMER_OFF (subgrp->t_coalesce); + sync_delete (subgrp); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("delete subgroup u%llu:s%llu", + subgrp->update_group->id, subgrp->id); + + update_group_remove_subgroup (subgrp->update_group, subgrp); + + XFREE (MTYPE_BGP_UPD_SUBGRP, subgrp); +} + +void +update_subgroup_inherit_info (struct update_subgroup *to, + struct update_subgroup *from) +{ + if (!to || !from) + return; + + to->sflags = from->sflags; +} + +/* + * update_subgroup_check_delete + * + * Delete a subgroup if it is ready to be deleted. + * + * Returns TRUE if the subgroup was deleted. + */ +static int +update_subgroup_check_delete (struct update_subgroup *subgrp) +{ + if (!subgrp) + return 0; + + if (!LIST_EMPTY (&(subgrp->peers))) + return 0; + + update_subgroup_delete (subgrp); + + return 1; +} + +/* + * update_subgroup_add_peer + * + * @param send_enqueued_packets If true all currently enqueued packets will + * also be sent to the peer. + */ +static void +update_subgroup_add_peer (struct update_subgroup *subgrp, struct peer_af *paf, + int send_enqueued_pkts) +{ + struct bpacket *pkt; + + if (!subgrp || !paf) + return; + + LIST_INSERT_HEAD (&(subgrp->peers), paf, subgrp_train); + paf->subgroup = subgrp; + subgrp->peer_count++; + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_EN(subgrp->update_group); + } + + SUBGRP_INCR_STAT (subgrp, join_events); + + if (send_enqueued_pkts) + { + pkt = bpacket_queue_first (SUBGRP_PKTQ (subgrp)); + } + else + { + + /* + * Hang the peer off of the last, placeholder, packet in the + * queue. This means it won't see any of the packets that are + * currently the queue. + */ + pkt = bpacket_queue_last (SUBGRP_PKTQ (subgrp)); + assert (pkt->buffer == NULL); + } + + bpacket_add_peer (pkt, paf); + + bpacket_queue_sanity_check (SUBGRP_PKTQ (subgrp)); +} + +/* + * update_subgroup_remove_peer_internal + * + * Internal function that removes a peer from a subgroup, but does not + * delete the subgroup. A call to this function must almost always be + * followed by a call to update_subgroup_check_delete(). + * + * @see update_subgroup_remove_peer + */ +static void +update_subgroup_remove_peer_internal (struct update_subgroup *subgrp, + struct peer_af *paf) +{ + assert (subgrp && paf); + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_DIS(subgrp->update_group); + } + + bpacket_queue_remove_peer (paf); + LIST_REMOVE (paf, subgrp_train); + paf->subgroup = NULL; + subgrp->peer_count--; + + SUBGRP_INCR_STAT (subgrp, prune_events); +} + +/* + * update_subgroup_remove_peer + */ +void +update_subgroup_remove_peer (struct update_subgroup *subgrp, + struct peer_af *paf) +{ + if (!subgrp || !paf) + return; + + update_subgroup_remove_peer_internal (subgrp, paf); + + if (update_subgroup_check_delete (subgrp)) + return; + + /* + * The deletion of the peer may have caused some packets to be + * deleted from the subgroup packet queue. Check if the subgroup can + * be merged now. + */ + update_subgroup_check_merge (subgrp, "removed peer from subgroup"); +} + +static struct update_subgroup * +update_subgroup_find (struct update_group *updgrp, struct peer_af *paf) +{ + struct update_subgroup *subgrp = NULL; + uint64_t version; + + if (paf->subgroup) + { + assert (0); + return NULL; + } + else + version = 0; + + if (!peer_established (PAF_PEER (paf))) + return NULL; + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (subgrp->version != version) + continue; + + /* + * The version number is not meaningful on a subgroup that needs + * a refresh. + */ + if (update_subgroup_needs_refresh (subgrp)) + continue; + + break; + } + + return subgrp; +} + +/* + * update_subgroup_ready_for_merge + * + * Returns TRUE if this subgroup is in a state that allows it to be + * merged into another subgroup. + */ +static inline int +update_subgroup_ready_for_merge (struct update_subgroup *subgrp) +{ + + /* + * Not ready if there are any encoded packets waiting to be written + * out to peers. + */ + if (!bpacket_queue_is_empty (SUBGRP_PKTQ (subgrp))) + return 0; + + /* + * Not ready if there enqueued updates waiting to be encoded. + */ + if (!advertise_list_is_empty (subgrp)) + return 0; + + /* + * Don't attempt to merge a subgroup that needs a refresh. For one, + * we can't determine if the adj_out of such a group matches that of + * another group. + */ + if (update_subgroup_needs_refresh (subgrp)) + return 0; + + return 1; +} + +/* + * update_subgrp_can_merge_into + * + * Returns TRUE if the first subgroup can merge into the second + * subgroup. + */ +static inline int +update_subgroup_can_merge_into (struct update_subgroup *subgrp, + struct update_subgroup *target) +{ + + if (subgrp == target) + return 0; + + /* + * Both must have processed the BRIB to the same point in order to + * be merged. + */ + if (subgrp->version != target->version) + return 0; + + /* + * If there are any adv entries on the target, then its adj-out (the + * set of advertised routes) does not match that of the other + * subgrp, and we cannot merge the two. + * + * The adj-out is used when generating a route refresh to a peer in + * a subgroup. If it is not accurate, say it is missing an entry, we + * may miss sending a withdraw for an entry as part of a refresh. + */ + if (!advertise_list_is_empty (target)) + return 0; + + if (update_subgroup_needs_refresh (target)) + return 0; + + return 1; +} + +/* + * update_subgroup_merge + * + * Merge the first subgroup into the second one. + */ +static void +update_subgroup_merge (struct update_subgroup *subgrp, + struct update_subgroup *target, const char *reason) +{ + struct peer_af *paf; + int result; + int peer_count; + + assert (subgrp->adj_count == target->adj_count); + + peer_count = subgrp->peer_count; + + while (1) + { + paf = LIST_FIRST (&subgrp->peers); + if (!paf) + break; + + update_subgroup_remove_peer_internal (subgrp, paf); + + /* + * Add the peer to the target subgroup, while making sure that + * any currently enqueued packets won't be sent to it. Enqueued + * packets could, for example, result in an unnecessary withdraw + * followed by an advertise. + */ + update_subgroup_add_peer (target, paf, 0); + } + + SUBGRP_INCR_STAT (target, merge_events); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu (%d peers) merged into u%llu:s%llu, " + "trigger: %s", subgrp->update_group->id, subgrp->id, peer_count, + target->update_group->id, target->id, reason ? reason : "unknown"); + + result = update_subgroup_check_delete (subgrp); + assert (result); +} + +/* + * update_subgroup_check_merge + * + * Merge this subgroup into another subgroup if possible. + * + * Returns TRUE if the subgroup has been merged. The subgroup pointer + * should not be accessed in this case. + */ +int +update_subgroup_check_merge (struct update_subgroup *subgrp, + const char *reason) +{ + struct update_subgroup *target; + + if (!update_subgroup_ready_for_merge (subgrp)) + return 0; + + /* + * Look for a subgroup to merge into. + */ + UPDGRP_FOREACH_SUBGRP (subgrp->update_group, target) + { + if (update_subgroup_can_merge_into (subgrp, target)) + break; + } + + if (!target) + return 0; + + update_subgroup_merge (subgrp, target, reason); + return 1; +} + + /* + * update_subgroup_merge_check_thread_cb + */ +static int +update_subgroup_merge_check_thread_cb (struct thread *thread) +{ + struct update_subgroup *subgrp; + + subgrp = THREAD_ARG (thread); + + subgrp->t_merge_check = NULL; + + update_subgroup_check_merge (subgrp, "triggered merge check"); + return 0; +} + +/* + * update_subgroup_trigger_merge_check + * + * Triggers a call to update_subgroup_check_merge() on a clean context. + * + * @param force If true, the merge check will be triggered even if the + * subgroup doesn't currently look ready for a merge. + * + * Returns TRUE if a merge check will be performed shortly. + */ +int +update_subgroup_trigger_merge_check (struct update_subgroup *subgrp, + int force) +{ + if (subgrp->t_merge_check) + return 1; + + if (!force && !update_subgroup_ready_for_merge (subgrp)) + return 0; + + subgrp->t_merge_check = + thread_add_background (master, + update_subgroup_merge_check_thread_cb, + subgrp, 0); + + SUBGRP_INCR_STAT (subgrp, merge_checks_triggered); + + return 1; +} + +/* + * update_subgroup_copy_adj_out + * + * Helper function that clones the adj out (state about advertised + * routes) from one subgroup to another. It assumes that the adj out + * of the target subgroup is empty. + */ +static void +update_subgroup_copy_adj_out (struct update_subgroup *source, + struct update_subgroup *dest) +{ + struct bgp_adj_out *aout, *aout_copy; + + SUBGRP_FOREACH_ADJ (source, aout) + { + /* + * Copy the adj out. + */ + aout_copy = bgp_adj_out_alloc (dest, aout->rn); + aout_copy->attr = aout->attr ? bgp_attr_refcount (aout->attr) : NULL; + } +} + +/* + * update_subgroup_copy_packets + * + * Copy packets after and including the given packet to the subgroup + * 'dest'. + * + * Returns the number of packets copied. + */ +static int +update_subgroup_copy_packets (struct update_subgroup *dest, + struct bpacket *pkt) +{ + int count; + + count = 0; + while (pkt && pkt->buffer) + { + bpacket_queue_add (SUBGRP_PKTQ (dest), stream_dup (pkt->buffer), + &pkt->arr); + count++; + pkt = bpacket_next (pkt); + } + + bpacket_queue_sanity_check (SUBGRP_PKTQ (dest)); + + return count; +} + +static int +updgrp_prefix_list_update (struct update_group *updgrp, char *name) +{ + struct peer *peer; + struct bgp_filter *filter; + + peer = UPDGRP_PEER (updgrp); + filter = &peer->filter[UPDGRP_AFI(updgrp)][UPDGRP_SAFI(updgrp)]; + + if (PREFIX_LIST_OUT_NAME(filter) && + (strcmp (name, PREFIX_LIST_OUT_NAME(filter)) == 0)) + { + PREFIX_LIST_OUT(filter) = + prefix_list_lookup (UPDGRP_AFI(updgrp), PREFIX_LIST_OUT_NAME(filter)); + return 1; + } + return 0; +} + +static int +updgrp_filter_list_update (struct update_group *updgrp, char *name) +{ + struct peer *peer; + struct bgp_filter *filter; + + peer = UPDGRP_PEER (updgrp); + filter = &peer->filter[UPDGRP_AFI(updgrp)][UPDGRP_SAFI(updgrp)]; + + if (FILTER_LIST_OUT_NAME(filter) && + (strcmp (name, FILTER_LIST_OUT_NAME(filter)) == 0)) + { + FILTER_LIST_OUT(filter) = as_list_lookup (FILTER_LIST_OUT_NAME(filter)); + return 1; + } + return 0; +} + +static int +updgrp_distribute_list_update (struct update_group *updgrp, char *name) +{ + struct peer *peer; + struct bgp_filter *filter; + + peer = UPDGRP_PEER(updgrp); + filter = &peer->filter[UPDGRP_AFI(updgrp)][UPDGRP_SAFI(updgrp)]; + + if (DISTRIBUTE_OUT_NAME(filter) && + (strcmp (name, DISTRIBUTE_OUT_NAME(filter)) == 0)) + { + DISTRIBUTE_OUT(filter) = access_list_lookup(UPDGRP_AFI(updgrp), + DISTRIBUTE_OUT_NAME(filter)); + return 1; + } + return 0; +} + +static int +updgrp_route_map_update (struct update_group *updgrp, char *name, + int *def_rmap_changed) +{ + struct peer *peer; + struct bgp_filter *filter; + int changed = 0; + afi_t afi; + safi_t safi; + + peer = UPDGRP_PEER (updgrp); + afi = UPDGRP_AFI (updgrp); + safi = UPDGRP_SAFI (updgrp); + filter = &peer->filter[afi][safi]; + + if (ROUTE_MAP_OUT_NAME(filter) && + (strcmp (name, ROUTE_MAP_OUT_NAME(filter)) == 0)) + { + ROUTE_MAP_OUT(filter) = route_map_lookup_by_name (name); + + changed = 1; + } + + if (UNSUPPRESS_MAP_NAME(filter) && + (strcmp (name, UNSUPPRESS_MAP_NAME(filter)) == 0)) + { + UNSUPPRESS_MAP(filter) = route_map_lookup_by_name (name); + changed = 1; + } + + /* process default-originate route-map */ + if (peer->default_rmap[afi][safi].name && + (strcmp (name, peer->default_rmap[afi][safi].name) == 0)) + { + peer->default_rmap[afi][safi].map = route_map_lookup_by_name (name); + if (def_rmap_changed) + *def_rmap_changed = 1; + } + return changed; +} + +/* + * hash iteration callback function to process a policy change for an + * update group. Check if the changed policy matches the updgrp's + * outbound route-map or unsuppress-map or default-originate map or + * filter-list or prefix-list or distribute-list. + * Trigger update generation accordingly. + */ +static int +updgrp_policy_update_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + int changed = 0; + int def_changed = 0; + + if (!updgrp || !ctx || !ctx->policy_name) + return UPDWALK_CONTINUE; + + switch (ctx->policy_type) { + case BGP_POLICY_ROUTE_MAP: + changed = updgrp_route_map_update(updgrp, ctx->policy_name, &def_changed); + break; + case BGP_POLICY_FILTER_LIST: + changed = updgrp_filter_list_update(updgrp, ctx->policy_name); + break; + case BGP_POLICY_PREFIX_LIST: + changed = updgrp_prefix_list_update(updgrp, ctx->policy_name); + break; + case BGP_POLICY_DISTRIBUTE_LIST: + changed = updgrp_distribute_list_update(updgrp, ctx->policy_name); + break; + default: + break; + } + + /* If not doing route update, return after updating "config" */ + if (!ctx->policy_route_update) + return UPDWALK_CONTINUE; + + /* If nothing has changed, return after updating "config" */ + if (!changed && !def_changed) + return UPDWALK_CONTINUE; + + /* + * If something has changed, at the beginning of a route-map modification + * event, mark each subgroup's needs-refresh bit. For one, it signals to + * whoever that the subgroup needs a refresh. Second, it prevents premature + * merge of this subgroup with another before a complete (outbound) refresh. + */ + if (ctx->policy_event_start_flag) + { + UPDGRP_FOREACH_SUBGRP(updgrp, subgrp) + { + update_subgroup_set_needs_refresh(subgrp, 1); + } + return UPDWALK_CONTINUE; + } + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (changed) + { + if (bgp_debug_update(NULL, NULL, updgrp, 0)) + zlog_debug ("u%llu:s%llu announcing routes upon policy %s (type %d) change", + updgrp->id, subgrp->id, ctx->policy_name, ctx->policy_type); + subgroup_announce_route (subgrp); + } + if (def_changed) + { + if (bgp_debug_update(NULL, NULL, updgrp, 0)) + zlog_debug ("u%llu:s%llu announcing default upon default routemap %s change", + updgrp->id, subgrp->id, ctx->policy_name); + subgroup_default_originate (subgrp, 0); + } + update_subgroup_set_needs_refresh(subgrp, 0); + } + return UPDWALK_CONTINUE; +} + +static int +update_group_walkcb (struct hash_backet *backet, void *arg) +{ + struct update_group *updgrp = backet->data; + struct updwalk_context *wctx = arg; + int ret = (*wctx->cb) (updgrp, wctx->context); + return ret; +} + +static int +update_group_periodic_merge_walkcb (struct update_group *updgrp, void *arg) +{ + struct update_subgroup *subgrp; + struct update_subgroup *tmp_subgrp; + const char *reason = arg; + + UPDGRP_FOREACH_SUBGRP_SAFE (updgrp, subgrp, tmp_subgrp) + update_subgroup_check_merge (subgrp, reason); + return UPDWALK_CONTINUE; +} + +/******************** + * PUBLIC FUNCTIONS + ********************/ + +/* + * trigger function when a policy (route-map/filter-list/prefix-list/ + * distribute-list etc.) content changes. Go through all the + * update groups and process the change. + * + * bgp: the bgp instance + * ptype: the type of policy that got modified, see bgpd.h + * pname: name of the policy + * route_update: flag to control if an automatic update generation should + * occur + * start_event: flag that indicates if it's the beginning of the change. + * Esp. when the user is changing the content interactively + * over multiple statements. Useful to set dirty flag on + * update groups. + */ +void +update_group_policy_update (struct bgp *bgp, bgp_policy_type_e ptype, + char *pname, int route_update, int start_event) +{ + struct updwalk_context ctx; + + memset (&ctx, 0, sizeof (ctx)); + ctx.policy_type = ptype; + ctx.policy_name = pname; + ctx.policy_route_update = route_update; + ctx.policy_event_start_flag = start_event; + ctx.flags = 0; + + update_group_walk (bgp, updgrp_policy_update_walkcb, &ctx); +} + +/* + * update_subgroup_split_peer + * + * Ensure that the given peer is in a subgroup of its own in the + * specified update group. + */ +void +update_subgroup_split_peer (struct peer_af *paf, struct update_group *updgrp) +{ + struct update_subgroup *old_subgrp, *subgrp; + uint64_t old_id; + + + old_subgrp = paf->subgroup; + + if (!updgrp) + updgrp = old_subgrp->update_group; + + /* + * If the peer is alone in its subgroup, reuse the existing + * subgroup. + */ + if (old_subgrp->peer_count == 1) + { + if (updgrp == old_subgrp->update_group) + return; + + subgrp = old_subgrp; + old_id = old_subgrp->update_group->id; + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_DIS(old_subgrp->update_group); + } + + update_group_remove_subgroup (old_subgrp->update_group, old_subgrp); + update_group_add_subgroup (updgrp, subgrp); + + if (bgp_debug_peer_updout_enabled(paf->peer)) + { + UPDGRP_PEER_DBG_EN(updgrp); + } + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu peer %s moved to u%llu:s%llu", + old_id, subgrp->id, paf->peer->host, updgrp->id, subgrp->id); + + /* + * The state of the subgroup (adj_out, advs, packet queue etc) + * is consistent internally, but may not be identical to other + * subgroups in the new update group even if the version number + * matches up. Make sure a full refresh is done before the + * subgroup is merged with another. + */ + update_subgroup_set_needs_refresh (subgrp, 1); + + SUBGRP_INCR_STAT (subgrp, updgrp_switch_events); + return; + } + + /* + * Create a new subgroup under the specified update group, and copy + * over relevant state to it. + */ + subgrp = update_subgroup_create (updgrp); + update_subgroup_inherit_info (subgrp, old_subgrp); + + subgrp->split_from.update_group_id = old_subgrp->update_group->id; + subgrp->split_from.subgroup_id = old_subgrp->id; + + /* + * Copy out relevant state from the old subgroup. + */ + update_subgroup_copy_adj_out (paf->subgroup, subgrp); + update_subgroup_copy_packets (subgrp, paf->next_pkt_to_send); + + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu peer %s split and moved into u%llu:s%llu", + paf->subgroup->update_group->id, paf->subgroup->id, + paf->peer->host, updgrp->id, subgrp->id); + + SUBGRP_INCR_STAT (paf->subgroup, split_events); + + /* + * Since queued advs were left behind, this new subgroup needs a + * refresh. + */ + update_subgroup_set_needs_refresh (subgrp, 1); + + /* + * Remove peer from old subgroup, and add it to the new one. + */ + update_subgroup_remove_peer (paf->subgroup, paf); + + update_subgroup_add_peer (subgrp, paf, 1); +} + +void +update_group_init (struct bgp *bgp) +{ + int afid; + + AF_FOREACH (afid) + bgp->update_groups[afid] = hash_create (updgrp_hash_key_make, + updgrp_hash_cmp); +} + +void +update_group_show (struct bgp *bgp, afi_t afi, safi_t safi, struct vty *vty) +{ + update_group_af_walk (bgp, afi, safi, update_group_show_walkcb, vty); +} + +/* + * update_group_show_stats + * + * Show global statistics about update groups. + */ +void +update_group_show_stats (struct bgp *bgp, struct vty *vty) +{ + vty_out (vty, "Update groups created: %u%s", + bgp->update_group_stats.updgrps_created, VTY_NEWLINE); + vty_out (vty, "Update groups deleted: %u%s", + bgp->update_group_stats.updgrps_deleted, VTY_NEWLINE); + vty_out (vty, "Update subgroups created: %u%s", + bgp->update_group_stats.subgrps_created, VTY_NEWLINE); + vty_out (vty, "Update subgroups deleted: %u%s", + bgp->update_group_stats.subgrps_deleted, VTY_NEWLINE); + vty_out (vty, "Join events: %u%s", + bgp->update_group_stats.join_events, VTY_NEWLINE); + vty_out (vty, "Prune events: %u%s", + bgp->update_group_stats.prune_events, VTY_NEWLINE); + vty_out (vty, "Merge events: %u%s", + bgp->update_group_stats.merge_events, VTY_NEWLINE); + vty_out (vty, "Split events: %u%s", + bgp->update_group_stats.split_events, VTY_NEWLINE); + vty_out (vty, "Update group switch events: %u%s", + bgp->update_group_stats.updgrp_switch_events, VTY_NEWLINE); + vty_out (vty, "Peer route refreshes combined: %u%s", + bgp->update_group_stats.peer_refreshes_combined, VTY_NEWLINE); + vty_out (vty, "Merge checks triggered: %u%s", + bgp->update_group_stats.merge_checks_triggered, VTY_NEWLINE); +} + +/* + * update_group_adjust_peer + */ +void +update_group_adjust_peer (struct peer_af *paf) +{ + struct update_group *updgrp; + struct update_subgroup *subgrp, *old_subgrp; + struct peer *peer; + + if (!paf) + return; + + peer = PAF_PEER (paf); + if (!peer_established (peer)) + { + return; + } + + if (!CHECK_FLAG (peer->flags, PEER_FLAG_CONFIG_NODE)) + { + return; + } + + if (!peer->afc_nego[paf->afi][paf->safi]) + { + return; + } + + updgrp = update_group_find (paf); + if (!updgrp) + { + updgrp = update_group_create (paf); + if (!updgrp) + { + zlog_err ("couldn't create update group for peer %s", + paf->peer->host); + return; + } + } + + old_subgrp = paf->subgroup; + + if (old_subgrp) + { + + /* + * If the update group of the peer is unchanged, the peer can stay + * in its existing subgroup and we're done. + */ + if (old_subgrp->update_group == updgrp) + return; + + /* + * The peer is switching between update groups. Put it in its + * own subgroup under the new update group. + */ + update_subgroup_split_peer (paf, updgrp); + return; + } + + subgrp = update_subgroup_find (updgrp, paf); + if (!subgrp) + { + subgrp = update_subgroup_create (updgrp); + if (!subgrp) + return; + } + + update_subgroup_add_peer (subgrp, paf, 1); + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu add peer %s", + updgrp->id, subgrp->id, paf->peer->host); + + return; +} + +int +update_group_adjust_soloness (struct peer *peer, int set) +{ + struct peer_group *group; + struct listnode *node, *nnode; + + if (peer_group_active (peer)) + return BGP_ERR_INVALID_FOR_PEER_GROUP_MEMBER; + + if (!CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + { + peer_lonesoul_or_not (peer, set); + if (peer->status == Established) + bgp_announce_route_all (peer); + } + else + { + group = peer->group; + for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) + { + peer_lonesoul_or_not (peer, set); + if (peer->status == Established) + bgp_announce_route_all (peer); + } + } + return 0; +} + +/* + * update_subgroup_rib + */ +struct bgp_table * +update_subgroup_rib (struct update_subgroup *subgrp) +{ + struct bgp *bgp; + + bgp = SUBGRP_INST (subgrp); + if (!bgp) + return NULL; + + return bgp->rib[SUBGRP_AFI (subgrp)][SUBGRP_SAFI (subgrp)]; +} + +void +update_group_af_walk (struct bgp *bgp, afi_t afi, safi_t safi, + updgrp_walkcb cb, void *ctx) +{ + struct updwalk_context wctx; + int afid; + + if (!bgp) + return; + afid = afindex (afi, safi); + if (afid >= BGP_AF_MAX) + return; + + memset (&wctx, 0, sizeof (wctx)); + wctx.cb = cb; + wctx.context = ctx; + hash_walk (bgp->update_groups[afid], update_group_walkcb, &wctx); +} + +void +update_group_walk (struct bgp *bgp, updgrp_walkcb cb, void *ctx) +{ + afi_t afi; + safi_t safi; + + FOREACH_AFI_SAFI (afi, safi) + { + update_group_af_walk (bgp, afi, safi, cb, ctx); + } +} + +void +update_group_periodic_merge (struct bgp *bgp) +{ + char reason[] = "periodic merge check"; + + update_group_walk (bgp, update_group_periodic_merge_walkcb, + (void *) reason); +} + +/* + * peer_af_announce_route + * + * Refreshes routes out to a peer_af immediately. + * + * If the combine parameter is TRUE, then this function will try to + * gather other peers in the subgroup for which a route announcement + * is pending and efficently announce routes to all of them. + * + * For now, the 'combine' option has an effect only if all peers in + * the subgroup have a route announcement pending. + */ +void +peer_af_announce_route (struct peer_af *paf, int combine) +{ + struct update_subgroup *subgrp; + struct peer_af *cur_paf; + int all_pending; + + subgrp = paf->subgroup; + all_pending = 0; + + if (combine) + { + struct peer_af *temp_paf; + + /* + * If there are other peers in the old subgroup that also need + * routes to be announced, pull them into the peer's new + * subgroup. + * Combine route announcement with other peers if possible. + * + * For now, we combine only if all peers in the subgroup have an + * announcement pending. + */ + all_pending = 1; + + SUBGRP_FOREACH_PEER (subgrp, cur_paf) + { + if (cur_paf == paf) + continue; + + if (cur_paf->t_announce_route) + continue; + + all_pending = 0; + break; + } + } + /* + * Announce to the peer alone if we were not asked to combine peers, + * or if some peers don't have a route annoucement pending. + */ + if (!combine || !all_pending) + { + update_subgroup_split_peer (paf, NULL); + if (!paf->subgroup) + return; + + if (bgp_debug_update(paf->peer, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu %s announcing routes", + subgrp->update_group->id, subgrp->id, paf->peer->host); + + subgroup_announce_route (paf->subgroup); + return; + } + + /* + * We will announce routes the entire subgroup. + * + * First stop refresh timers on all the other peers. + */ + SUBGRP_FOREACH_PEER (subgrp, cur_paf) + { + if (cur_paf == paf) + continue; + + bgp_stop_announce_route_timer (cur_paf); + } + + if (bgp_debug_update(paf->peer, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu announcing routes to %s, combined into %d peers", + subgrp->update_group->id, subgrp->id, + paf->peer->host, subgrp->peer_count); + + subgroup_announce_route (subgrp); + + SUBGRP_INCR_STAT_BY (subgrp, peer_refreshes_combined, + subgrp->peer_count - 1); +} + +void +subgroup_trigger_write (struct update_subgroup *subgrp) +{ + struct peer_af *paf; + +#if 0 + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug("u%llu:s%llu scheduling write thread for peers", + subgrp->update_group->id, subgrp->id); +#endif + SUBGRP_FOREACH_PEER (subgrp, paf) + { + if (paf->peer->status == Established) + { + BGP_PEER_WRITE_ON (paf->peer->t_write, bgp_write, paf->peer->fd, + paf->peer); + } + } +} + +int +update_group_clear_update_dbg (struct update_group *updgrp, void *arg) +{ + UPDGRP_PEER_DBG_OFF(updgrp); + return UPDWALK_CONTINUE; +} diff --git a/bgpd/bgp_updgrp.h b/bgpd/bgp_updgrp.h new file mode 100644 index 000000000000..aa2ef5036247 --- /dev/null +++ b/bgpd/bgp_updgrp.h @@ -0,0 +1,594 @@ +/** + * bgp_updgrp.c: BGP update group structures + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#ifndef _QUAGGA_BGP_UPDGRP_H +#define _QUAGGA_BGP_UPDGRP_H + +#include "bgp_advertise.h" + +#define BGP_DEFAULT_SUBGROUP_COALESCE_TIME 200 + +#define PEER_UPDGRP_FLAGS (PEER_FLAG_LOCAL_AS_NO_PREPEND | \ + PEER_FLAG_LOCAL_AS_REPLACE_AS) + +#define PEER_UPDGRP_AF_FLAGS (PEER_FLAG_SEND_COMMUNITY | \ + PEER_FLAG_SEND_EXT_COMMUNITY | \ + PEER_FLAG_DEFAULT_ORIGINATE | \ + PEER_FLAG_REFLECTOR_CLIENT | \ + PEER_FLAG_NEXTHOP_SELF | \ + PEER_FLAG_NEXTHOP_UNCHANGED | \ + PEER_FLAG_AS_PATH_UNCHANGED | \ + PEER_FLAG_MED_UNCHANGED | \ + PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED | \ + PEER_FLAG_REMOVE_PRIVATE_AS | \ + PEER_FLAG_REMOVE_PRIVATE_AS_ALL | \ + PEER_FLAG_REMOVE_PRIVATE_AS_REPLACE | \ + PEER_FLAG_AS_OVERRIDE) + +#define PEER_UPDGRP_CAP_FLAGS (PEER_CAP_AS4_RCV) + +#define PEER_UPDGRP_AF_CAP_FLAGS (PEER_CAP_ORF_PREFIX_SM_RCV | \ + PEER_CAP_ORF_PREFIX_SM_OLD_RCV) + +typedef enum +{ + BGP_ATTR_VEC_NH = 0, + BGP_ATTR_VEC_MAX +} bpacket_attr_vec_type; + +typedef struct +{ + u_int32_t flags; + unsigned long offset; +} bpacket_attr_vec; + +#define BPACKET_ATTRVEC_FLAGS_UPDATED (1 << 0) +#define BPACKET_ATTRVEC_FLAGS_RMAP_CHANGED (1 << 1) +#define BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS (1 << 2) +#define BPACKET_ATTRVEC_FLAGS_REFLECTED (1 << 3) + +typedef struct bpacket_attr_vec_arr +{ + bpacket_attr_vec entries[BGP_ATTR_VEC_MAX]; +} bpacket_attr_vec_arr; + +struct bpacket +{ + /* for being part of an update subgroup's message list */ + TAILQ_ENTRY (bpacket) pkt_train; + + /* list of peers (well, peer_afs) that the packet needs to be sent to */ + LIST_HEAD (pkt_peer_list, peer_af) peers; + + struct stream *buffer; + bpacket_attr_vec_arr arr; + + unsigned int ver; +}; + +struct bpacket_queue +{ + TAILQ_HEAD (pkt_queue, bpacket) pkts; + +#if 0 + /* A dummy packet that is used to thread all peers that have + completed their work */ + struct bpacket sentinel; +#endif + + unsigned int conf_max_count; + unsigned int curr_count; + unsigned int hwm_count; + unsigned int max_count_reached_count; +}; + +struct update_group +{ + /* back pointer to the BGP instance */ + struct bgp *bgp; + + /* list of subgroups that belong to the update group */ + LIST_HEAD (subgrp_list, update_subgroup) subgrps; + + /* lazy way to store configuration common to all peers + hash function will compute from this data */ + struct peer *conf; + + afi_t afi; + safi_t safi; + int afid; + + u_int64_t id; + time_t uptime; + + u_int32_t join_events; + u_int32_t prune_events; + u_int32_t merge_events; + u_int32_t updgrp_switch_events; + u_int32_t peer_refreshes_combined; + u_int32_t adj_count; + u_int32_t split_events; + u_int32_t merge_checks_triggered; + + u_int32_t subgrps_created; + u_int32_t subgrps_deleted; + + u_int32_t num_dbg_en_peers; +}; + +/* + * Shorthand for a global statistics counter. + */ +#define UPDGRP_GLOBAL_STAT(updgrp, stat) \ + ((updgrp)->bgp->update_group_stats.stat) + +/* + * Add the given value to a counter on an update group and the bgp + * instance. + */ +#define UPDGRP_INCR_STAT_BY(updgrp, stat, value) \ + do { \ + (updgrp)->stat += (value); \ + UPDGRP_GLOBAL_STAT(updgrp, stat) += (value); \ + } while (0) + +/* + * Increment a counter on a update group and its parent structures. + */ +#define UPDGRP_INCR_STAT(subgrp, stat) \ + UPDGRP_INCR_STAT_BY(subgrp, stat, 1) + +struct update_subgroup +{ + /* back pointer to the parent update group */ + struct update_group *update_group; + + /* list of peers that belong to the subgroup */ + LIST_HEAD (peer_list, peer_af) peers; + int peer_count; + + /* for being part of an update group's subgroup list */ + LIST_ENTRY (update_subgroup) updgrp_train; + + struct bpacket_queue pkt_queue; + + /* + * List of adj-out structures for this subgroup. + * It essentially represents the snapshot of every prefix that + * has been advertised to the members of the subgroup + */ + TAILQ_HEAD (adjout_queue, bgp_adj_out) adjq; + + /* packet buffer for update generation */ + struct stream *work; + + /* We use a separate stream to encode MP_REACH_NLRI for efficient + * NLRI packing. peer->work stores all the other attributes. The + * actual packet is then constructed by concatenating the two. + */ + struct stream *scratch; + + /* synchronization list and time */ + struct bgp_synchronize *sync; + + /* send prefix count */ + unsigned long scount; + + /* announcement attribute hash */ + struct hash *hash; + + struct thread *t_coalesce; + u_int32_t v_coalesce; + + struct thread *t_merge_check; + + /* table version that the subgroup has caught up to. */ + uint64_t version; + + /* version maintained to record adj changes */ + uint64_t adj_version; + + time_t uptime; + + /* + * Identifying information about the subgroup that this subgroup was split + * from, if any. + */ + struct + { + u_int64_t update_group_id; + u_int64_t subgroup_id; + } split_from; + + u_int32_t join_events; + u_int32_t prune_events; + + /* + * This is bumped up when another subgroup merges into this one. + */ + u_int32_t merge_events; + u_int32_t updgrp_switch_events; + u_int32_t peer_refreshes_combined; + u_int32_t adj_count; + u_int32_t split_events; + u_int32_t merge_checks_triggered; + + u_int64_t id; + struct zlog *log; + + u_int16_t sflags; + + /* Subgroup flags, see below */ + u_int16_t flags; +}; + +/* + * We need to do an outbound refresh to get this subgroup into a + * consistent state. + */ +#define SUBGRP_FLAG_NEEDS_REFRESH (1 << 0) + +#define SUBGRP_STATUS_DEFAULT_ORIGINATE (1 << 0) + +/* + * Add the given value to the specified counter on a subgroup and its + * parent structures. + */ +#define SUBGRP_INCR_STAT_BY(subgrp, stat, value) \ + do { \ + (subgrp)->stat += (value); \ + if ((subgrp)->update_group) \ + UPDGRP_INCR_STAT_BY((subgrp)->update_group, stat, value); \ + } while (0) + +/* + * Increment a counter on a subgroup and its parent structures. + */ +#define SUBGRP_INCR_STAT(subgrp, stat) \ + SUBGRP_INCR_STAT_BY(subgrp, stat, 1) + +/* + * Decrement a counter on a subgroup and its parent structures. + */ +#define SUBGRP_DECR_STAT(subgrp, stat) \ + SUBGRP_INCR_STAT_BY(subgrp, stat, -1) + + +typedef int (*updgrp_walkcb) (struct update_group * updgrp, void *ctx); + +/* really a private structure */ +struct updwalk_context +{ + struct vty *vty; + struct bgp_node *rn; + struct bgp_info *ri; + u_int64_t updgrp_id; + u_int64_t subgrp_id; + bgp_policy_type_e policy_type; + char *policy_name; + int policy_event_start_flag; + int policy_route_update; + updgrp_walkcb cb; + void *context; + u_int8_t flags; + +#define UPDWALK_FLAGS_ADVQUEUE (1 << 0) +#define UPDWALK_FLAGS_ADVERTISED (1 << 1) +}; + +#define UPDWALK_CONTINUE HASHWALK_CONTINUE +#define UPDWALK_ABORT HASHWALK_ABORT + +#define PAF_PEER(p) ((p)->peer) +#define PAF_SUBGRP(p) ((p)->subgroup) +#define PAF_UPDGRP(p) ((p)->subgroup->update_group) +#define PAF_PKTQ(f) SUBGRP_PKTQ((f)->subgroup) + +#define UPDGRP_PEER(u) ((u)->conf) +#define UPDGRP_AFI(u) ((u)->afi) +#define UPDGRP_SAFI(u) ((u)->safi) +#define UPDGRP_INST(u) ((u)->bgp) +#define UPDGRP_AFFLAGS(u) \ + ((u)->conf->af_flags[UPDGRP_AFI(u)][UPDGRP_SAFI(u)]) +#define UPDGRP_DBG_ON(u) ((u)->num_dbg_en_peers) +#define UPDGRP_PEER_DBG_EN(u) (((u)->num_dbg_en_peers)++) +#define UPDGRP_PEER_DBG_DIS(u) (((u)->num_dbg_en_peers)--) +#define UPDGRP_PEER_DBG_OFF(u) (u)->num_dbg_en_peers = 0 + +#define SUBGRP_AFI(s) UPDGRP_AFI((s)->update_group) +#define SUBGRP_SAFI(s) UPDGRP_SAFI((s)->update_group) +#define SUBGRP_PEER(s) UPDGRP_PEER((s)->update_group) +#define SUBGRP_PCOUNT(s) ((s)->peer_count) +#define SUBGRP_PFIRST(s) LIST_FIRST(&((s)->peers)) +#define SUBGRP_PKTQ(s) &((s)->pkt_queue) +#define SUBGRP_INST(s) UPDGRP_INST((s)->update_group) +#define SUBGRP_AFFLAGS(s) UPDGRP_AFFLAGS((s)->update_group) +#define SUBGRP_UPDGRP(s) ((s)->update_group) + +/* + * Walk all subgroups in an update group. + */ +#define UPDGRP_FOREACH_SUBGRP(updgrp, subgrp) \ + LIST_FOREACH(subgrp, &((updgrp)->subgrps), updgrp_train) + +#define UPDGRP_FOREACH_SUBGRP_SAFE(updgrp, subgrp, tmp_subgrp) \ + LIST_FOREACH_SAFE(subgrp, &((updgrp)->subgrps), updgrp_train, tmp_subgrp) + +#define SUBGRP_FOREACH_PEER(subgrp, paf) \ + LIST_FOREACH(paf, &(subgrp->peers), subgrp_train) + +#define SUBGRP_FOREACH_PEER_SAFE(subgrp, paf, temp_paf) \ + LIST_FOREACH_SAFE(paf, &(subgrp->peers), subgrp_train, temp_paf) + +#define SUBGRP_FOREACH_ADJ(subgrp, adj) \ + TAILQ_FOREACH(adj, &(subgrp->adjq), subgrp_adj_train) + +#define SUBGRP_FOREACH_ADJ_SAFE(subgrp, adj, adj_temp) \ + TAILQ_FOREACH_SAFE(adj, &(subgrp->adjq), subgrp_adj_train, adj_temp) + +/* Prototypes. */ +/* bgp_updgrp.c */ +extern void update_group_init (struct bgp *); +extern void +update_group_show (struct bgp *bgp, afi_t afi, safi_t safi, struct vty *vty); +extern void update_group_show_stats (struct bgp *bgp, struct vty *vty); +extern void update_group_adjust_peer (struct peer_af *paf); +extern int update_group_adjust_soloness (struct peer *peer, int set); + +extern void +update_subgroup_remove_peer (struct update_subgroup *, struct peer_af *); +extern struct bgp_table *update_subgroup_rib (struct update_subgroup *); +extern void +update_subgroup_split_peer (struct peer_af *, struct update_group *); +extern int +update_subgroup_check_merge (struct update_subgroup *, const char *); +extern int +update_subgroup_trigger_merge_check (struct update_subgroup *, + int force); +extern void update_group_policy_update (struct bgp *bgp, + bgp_policy_type_e ptype, char *pname, + int route_update, int start_event); +extern void update_group_af_walk (struct bgp *bgp, afi_t afi, safi_t safi, + updgrp_walkcb cb, void *ctx); +extern void update_group_walk (struct bgp *bgp, updgrp_walkcb cb, void *ctx); +extern void update_group_periodic_merge (struct bgp *bgp); +extern void update_group_start_advtimer (struct bgp *bgp); + +extern void update_subgroup_inherit_info (struct update_subgroup *to, + struct update_subgroup *from); + +/* bgp_updgrp_packet.c */ +extern struct bpacket *bpacket_alloc (void); +extern void bpacket_free (struct bpacket *pkt); +extern void bpacket_queue_init (struct bpacket_queue *q); +extern void bpacket_queue_cleanup (struct bpacket_queue *q); +extern void bpacket_queue_sanity_check (struct bpacket_queue *q); +extern struct bpacket *bpacket_queue_add (struct bpacket_queue *q, + struct stream *s, + struct bpacket_attr_vec_arr + *vecarr); +struct bpacket *bpacket_queue_remove (struct bpacket_queue *q); +extern struct bpacket *bpacket_queue_first (struct bpacket_queue *q); +struct bpacket *bpacket_queue_last (struct bpacket_queue *q); +unsigned int bpacket_queue_length (struct bpacket_queue *q); +unsigned int bpacket_queue_hwm_length (struct bpacket_queue *q); +int bpacket_queue_is_full (struct bgp *bgp, struct bpacket_queue *q); +extern void bpacket_queue_advance_peer (struct peer_af *paf); +extern void bpacket_queue_remove_peer (struct peer_af *paf); +extern void bpacket_add_peer (struct bpacket *pkt, struct peer_af *paf); +unsigned int bpacket_queue_virtual_length (struct peer_af *paf); +extern void bpacket_queue_show_vty (struct bpacket_queue *q, struct vty *vty); +int subgroup_packets_to_build (struct update_subgroup *subgrp); +extern struct bpacket *subgroup_update_packet (struct update_subgroup *s); +extern struct bpacket *subgroup_withdraw_packet (struct update_subgroup *s); +extern struct stream *bpacket_reformat_for_peer (struct bpacket *pkt, + struct peer_af *paf); +extern void bpacket_attr_vec_arr_reset (struct bpacket_attr_vec_arr *vecarr); +extern void bpacket_attr_vec_arr_set_vec (struct bpacket_attr_vec_arr *vecarr, + bpacket_attr_vec_type type, + struct stream *s, + struct attr *attr); +extern void +subgroup_default_update_packet (struct update_subgroup *subgrp, + struct attr *attr, struct peer *from); +extern void subgroup_default_withdraw_packet (struct update_subgroup *subgrp); + +/* bgp_updgrp_adv.c */ +extern struct bgp_advertise *bgp_advertise_clean_subgroup (struct + update_subgroup + *subgrp, + struct bgp_adj_out + *adj); +extern void update_group_show_adj_queue (struct bgp *bgp, afi_t afi, + safi_t safi, struct vty *vty, + u_int64_t id); +extern void update_group_show_advertised (struct bgp *bgp, afi_t afi, + safi_t safi, struct vty *vty, + u_int64_t id); +extern void update_group_show_packet_queue (struct bgp *bgp, afi_t afi, + safi_t safi, struct vty *vty, + u_int64_t id); +extern void subgroup_announce_route (struct update_subgroup *subgrp); +extern void subgroup_announce_all (struct update_subgroup *subgrp); + +extern void +subgroup_default_originate (struct update_subgroup *subgrp, int withdraw); +extern void +group_announce_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct bgp_node *rn, struct bgp_info *ri); +extern void subgroup_clear_table (struct update_subgroup *subgrp); +extern void update_group_announce (struct bgp *bgp); +extern void update_group_announce_rrclients (struct bgp *bgp); +extern void peer_af_announce_route (struct peer_af *paf, int combine); +extern struct bgp_adj_out *bgp_adj_out_alloc (struct update_subgroup *subgrp, + struct bgp_node *rn); +extern void bgp_adj_out_remove_subgroup (struct bgp_node *rn, + struct bgp_adj_out *adj, + struct update_subgroup *subgrp); +extern void +bgp_adj_out_set_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp, + struct attr *attr, struct bgp_info *binfo); +extern void +bgp_adj_out_unset_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp); +void +subgroup_announce_table (struct update_subgroup *subgrp, + struct bgp_table *table, int rsclient); +extern void +subgroup_trigger_write (struct update_subgroup *subgrp); + +extern int +update_group_clear_update_dbg (struct update_group *updgrp, void *arg); + +/* + * Inline functions + */ + +/* + * bpacket_queue_is_empty + */ +static inline int +bpacket_queue_is_empty (struct bpacket_queue *queue) +{ + + /* + * The packet queue is empty if it only contains a sentinel. + */ + if (queue->curr_count != 1) + return 0; + + assert (bpacket_queue_first (queue)->buffer == NULL); + return 1; +} + +/* + * bpacket_next + * + * Returns the packet after the given packet in a bpacket queue. + */ +static inline struct bpacket * +bpacket_next (struct bpacket *pkt) +{ + return TAILQ_NEXT (pkt, pkt_train); +} + +/* + * update_group_adjust_peer_afs + * + * Adjust all peer_af structures for the given peer. + */ +static inline void +update_group_adjust_peer_afs (struct peer *peer) +{ + struct peer_af *paf; + afi_t afi; + + PEERAF_FOREACH (peer, paf, afi) update_group_adjust_peer (paf); +} + +/* + * update_group_remove_peer_afs + * + * Remove all peer_af structures for the given peer from their subgroups. + */ +static inline void +update_group_remove_peer_afs (struct peer *peer) +{ + struct peer_af *paf; + afi_t afi; + + PEERAF_FOREACH (peer, paf, afi) + update_subgroup_remove_peer (PAF_SUBGRP (paf), paf); +} + +/* + * update_subgroup_needs_refresh + */ +static inline int +update_subgroup_needs_refresh (const struct update_subgroup *subgrp) +{ + if (CHECK_FLAG (subgrp->flags, SUBGRP_FLAG_NEEDS_REFRESH)) + return 1; + else + return 0; +} + +/* + * update_subgroup_set_needs_refresh + */ +static inline void +update_subgroup_set_needs_refresh (struct update_subgroup *subgrp, int value) +{ + if (value) + SET_FLAG (subgrp->flags, SUBGRP_FLAG_NEEDS_REFRESH); + else + UNSET_FLAG (subgrp->flags, SUBGRP_FLAG_NEEDS_REFRESH); +} + +static inline struct update_subgroup * +peer_subgroup (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_af *paf; + + paf = peer_af_find (peer, afi, safi); + if (paf) + return PAF_SUBGRP (paf); + return NULL; +} + +/* + * update_group_adjust_peer_afs + * + * Adjust all peer_af structures for the given peer. + */ +static inline void +bgp_announce_peer (struct peer *peer) +{ + struct peer_af *paf; + int af; + + PEERAF_FOREACH (peer, paf, af) subgroup_announce_all (PAF_SUBGRP (paf)); +} + +/** + * advertise_list_is_empty + */ +static inline int +advertise_list_is_empty (struct update_subgroup *subgrp) +{ + if (!BGP_ADV_FIFO_EMPTY (&subgrp->sync->update) || + !BGP_ADV_FIFO_EMPTY (&subgrp->sync->withdraw) || + !BGP_ADV_FIFO_EMPTY (&subgrp->sync->withdraw_low)) + { + return 0; + } + + return 1; +} + +#endif /* _QUAGGA_BGP_UPDGRP_H */ diff --git a/bgpd/bgp_updgrp_adv.c b/bgpd/bgp_updgrp_adv.c new file mode 100644 index 000000000000..54da37c48c0f --- /dev/null +++ b/bgpd/bgp_updgrp_adv.c @@ -0,0 +1,765 @@ +/** + * bgp_updgrp_adv.c: BGP update group advertisement and adjacency + * maintenance + * + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#include "command.h" +#include "memory.h" +#include "prefix.h" +#include "hash.h" +#include "thread.h" +#include "queue.h" +#include "routemap.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_table.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_attr.h" +#include "bgpd/bgp_aspath.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_mplsvpn.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_advertise.h" + + +/******************** + * PRIVATE FUNCTIONS + ********************/ + +static inline struct bgp_adj_out * +adj_lookup (struct bgp_node *rn, struct update_subgroup *subgrp) +{ + struct bgp_adj_out *adj; + + if (!rn || !subgrp) + return NULL; + for (adj = rn->adj_out; adj; adj = adj->next) + if (adj->subgroup == subgrp) + break; + return adj; +} + +static void +adj_free (struct bgp_adj_out *adj) +{ + TAILQ_REMOVE (&(adj->subgroup->adjq), adj, subgrp_adj_train); + SUBGRP_DECR_STAT (adj->subgroup, adj_count); + XFREE (MTYPE_BGP_ADJ_OUT, adj); +} + +static int +group_announce_route_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + + /* + * Skip the subgroups that have coalesce timer running. We will + * walk the entire prefix table for those subgroups when the + * coalesce timer fires. + */ + if (!subgrp->t_coalesce) + subgroup_process_announce_selected (subgrp, ctx->ri, ctx->rn); + } + + return UPDWALK_CONTINUE; +} + +static void +subgrp_show_adjq_vty (struct update_subgroup *subgrp, struct vty *vty, + u_int8_t flags) +{ + struct bgp_table *table; + struct bgp_adj_out *adj; + unsigned long output_count; + struct bgp_node *rn; + int header1 = 1; + struct bgp *bgp; + int header2 = 1; + + bgp = SUBGRP_INST (subgrp); + if (!bgp) + return; + + table = bgp->rib[SUBGRP_AFI (subgrp)][SUBGRP_SAFI (subgrp)]; + + output_count = 0; + + for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) + for (adj = rn->adj_out; adj; adj = adj->next) + if (adj->subgroup == subgrp) + { + if (header1) + { + vty_out (vty, + "BGP table version is %llu, local router ID is %s%s", + table->version, inet_ntoa (bgp->router_id), + VTY_NEWLINE); + vty_out (vty, BGP_SHOW_SCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + vty_out (vty, BGP_SHOW_OCODE_HEADER, VTY_NEWLINE, VTY_NEWLINE); + header1 = 0; + } + if (header2) + { + vty_out (vty, BGP_SHOW_HEADER, VTY_NEWLINE); + header2 = 0; + } + if ((flags & UPDWALK_FLAGS_ADVQUEUE) && adj->adv && adj->adv->baa) + { + route_vty_out_tmp (vty, &rn->p, adj->adv->baa->attr, + SUBGRP_SAFI (subgrp), NULL); + output_count++; + } + if ((flags & UPDWALK_FLAGS_ADVERTISED) && adj->attr) + { + route_vty_out_tmp (vty, &rn->p, adj->attr, SUBGRP_SAFI (subgrp), + NULL); + output_count++; + } + } + if (output_count != 0) + vty_out (vty, "%sTotal number of prefixes %ld%s", + VTY_NEWLINE, output_count, VTY_NEWLINE); +} + +static int +updgrp_show_adj_walkcb (struct update_group *updgrp, void *arg) +{ + struct updwalk_context *ctx = arg; + struct update_subgroup *subgrp; + struct vty *vty; + + vty = ctx->vty; + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + if (ctx->subgrp_id && (ctx->subgrp_id != subgrp->id)) + continue; + vty_out (vty, "update group %llu, subgroup %llu%s", updgrp->id, + subgrp->id, VTY_NEWLINE); + subgrp_show_adjq_vty (subgrp, vty, ctx->flags); + } + return UPDWALK_CONTINUE; +} + +static void +updgrp_show_adj (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id, u_int8_t flags) +{ + struct updwalk_context ctx; + memset (&ctx, 0, sizeof (ctx)); + ctx.vty = vty; + ctx.subgrp_id = id; + ctx.flags = flags; + + update_group_af_walk (bgp, afi, safi, updgrp_show_adj_walkcb, &ctx); +} + +static int +subgroup_coalesce_timer (struct thread *thread) +{ + struct update_subgroup *subgrp; + + subgrp = THREAD_ARG (thread); + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu announcing routes upon coalesce timer expiry", + (SUBGRP_UPDGRP (subgrp))->id, subgrp->id); + subgrp->t_coalesce = NULL; + subgrp->v_coalesce = 0; + subgroup_announce_route (subgrp); + + + /* While the announce_route() may kick off the route advertisement timer for + * the members of the subgroup, we'd like to send the initial updates much + * faster (i.e., without enforcing MRAI). Also, if there were no routes to + * announce, this is the method currently employed to trigger the EOR. + */ + if (!bgp_update_delay_active(SUBGRP_INST(subgrp))) + { + struct peer_af *paf; + struct peer *peer; + + SUBGRP_FOREACH_PEER (subgrp, paf) + { + peer = PAF_PEER(paf); + BGP_TIMER_OFF(peer->t_routeadv); + BGP_TIMER_ON (peer->t_routeadv, bgp_routeadv_timer, 0); + } + } + + return 0; +} + +static int +update_group_announce_walkcb (struct update_group *updgrp, void *arg) +{ + struct update_subgroup *subgrp; + + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + subgroup_announce_all (subgrp); + } + + return UPDWALK_CONTINUE; +} + +static int +update_group_announce_rrc_walkcb (struct update_group *updgrp, void *arg) +{ + struct update_subgroup *subgrp; + afi_t afi; + safi_t safi; + struct peer *peer; + + afi = UPDGRP_AFI (updgrp); + safi = UPDGRP_SAFI (updgrp); + peer = UPDGRP_PEER (updgrp); + + /* Only announce if this is a group of route-reflector-clients */ + if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_REFLECTOR_CLIENT)) + { + UPDGRP_FOREACH_SUBGRP (updgrp, subgrp) + { + subgroup_announce_all (subgrp); + } + } + + return UPDWALK_CONTINUE; +} + +/******************** + * PUBLIC FUNCTIONS + ********************/ + +/** + * Allocate an adj-out object. Do proper initialization of its fields, + * primarily its association with the subgroup and the prefix. + */ +struct bgp_adj_out * +bgp_adj_out_alloc (struct update_subgroup *subgrp, struct bgp_node *rn) +{ + struct bgp_adj_out *adj; + + adj = XCALLOC (MTYPE_BGP_ADJ_OUT, sizeof (struct bgp_adj_out)); + adj->subgroup = subgrp; + if (rn) + { + BGP_ADJ_OUT_ADD (rn, adj); + bgp_lock_node (rn); + adj->rn = rn; + } + TAILQ_INSERT_TAIL (&(subgrp->adjq), adj, subgrp_adj_train); + SUBGRP_INCR_STAT (subgrp, adj_count); + return adj; +} + + +struct bgp_advertise * +bgp_advertise_clean_subgroup (struct update_subgroup *subgrp, + struct bgp_adj_out *adj) +{ + struct bgp_advertise *adv; + struct bgp_advertise_attr *baa; + struct bgp_advertise *next; + struct bgp_advertise_fifo *fhead; + + adv = adj->adv; + baa = adv->baa; + next = NULL; + + if (baa) + { + fhead = &subgrp->sync->update; + + /* Unlink myself from advertise attribute FIFO. */ + bgp_advertise_delete (baa, adv); + + /* Fetch next advertise candidate. */ + next = baa->adv; + + /* Unintern BGP advertise attribute. */ + bgp_advertise_unintern (subgrp->hash, baa); + } + else + fhead = &subgrp->sync->withdraw; + + + /* Unlink myself from advertisement FIFO. */ + BGP_ADV_FIFO_DEL (fhead, adv); + + /* Free memory. */ + bgp_advertise_free (adj->adv); + adj->adv = NULL; + + return next; +} + +void +bgp_adj_out_set_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp, + struct attr *attr, struct bgp_info *binfo) +{ + struct bgp_adj_out *adj = NULL; + struct bgp_advertise *adv; + + if (DISABLE_BGP_ANNOUNCE) + return; + + /* Look for adjacency information. */ + adj = adj_lookup (rn, subgrp); + + if (!adj) + { + adj = bgp_adj_out_alloc (subgrp, rn); + if (!adj) + return; + } + + if (adj->adv) + bgp_advertise_clean_subgroup (subgrp, adj); + adj->adv = bgp_advertise_new (); + + adv = adj->adv; + adv->rn = rn; + assert (adv->binfo == NULL); + adv->binfo = bgp_info_lock (binfo); /* bgp_info adj_out reference */ + + if (attr) + adv->baa = bgp_advertise_intern (subgrp->hash, attr); + else + adv->baa = baa_new (); + adv->adj = adj; + + /* Add new advertisement to advertisement attribute list. */ + bgp_advertise_add (adv->baa, adv); + + /* + * If the update adv list is empty, trigger the member peers' + * mrai timers so the socket writes can happen. + */ + if (BGP_ADV_FIFO_EMPTY (&subgrp->sync->update)) + { + struct peer_af *paf; + + SUBGRP_FOREACH_PEER (subgrp, paf) + { + bgp_adjust_routeadv (PAF_PEER (paf)); + } + } + + BGP_ADV_FIFO_ADD (&subgrp->sync->update, &adv->fifo); + + subgrp->version = max (subgrp->version, rn->version); +} + +void +bgp_adj_out_unset_subgroup (struct bgp_node *rn, + struct update_subgroup *subgrp) +{ + struct bgp_adj_out *adj; + struct bgp_advertise *adv; + + if (DISABLE_BGP_ANNOUNCE) + return; + + /* Lookup existing adjacency, if it is not there return immediately. */ + adj = adj_lookup (rn, subgrp); + + if (!adj) + goto done; + + /* Clearn up previous advertisement. */ + if (adj->adv) + bgp_advertise_clean_subgroup (subgrp, adj); + + if (adj->attr) + { + /* We need advertisement structure. */ + adj->adv = bgp_advertise_new (); + adv = adj->adv; + adv->rn = rn; + adv->adj = adj; + + /* Schedule packet write, if FIFO is getting its first entry. */ + if (BGP_ADV_FIFO_EMPTY (&subgrp->sync->withdraw)) + subgroup_trigger_write(subgrp); + + /* Add to synchronization entry for withdraw announcement. */ + BGP_ADV_FIFO_ADD (&subgrp->sync->withdraw, &adv->fifo); + } + else + { + /* Remove myself from adjacency. */ + BGP_ADJ_OUT_DEL (rn, adj); + + /* Free allocated information. */ + adj_free (adj); + + bgp_unlock_node (rn); + } + + /* + * Fall through. + */ + +done: + subgrp->version = max (subgrp->version, rn->version); +} + +void +bgp_adj_out_remove_subgroup (struct bgp_node *rn, struct bgp_adj_out *adj, + struct update_subgroup *subgrp) +{ + if (adj->attr) + bgp_attr_unintern (&adj->attr); + + if (adj->adv) + bgp_advertise_clean_subgroup (subgrp, adj); + + BGP_ADJ_OUT_DEL (rn, adj); + adj_free (adj); +} + +/* + * Go through all the routes and clean up the adj/adv structures corresponding + * to the subgroup. + */ +void +subgroup_clear_table (struct update_subgroup *subgrp) +{ + struct bgp_adj_out *aout, *taout; + + SUBGRP_FOREACH_ADJ_SAFE (subgrp, aout, taout) + { + bgp_unlock_node (aout->rn); + bgp_adj_out_remove_subgroup (aout->rn, aout, subgrp); + } +} + +/* + * subgroup_announce_table + */ +void +subgroup_announce_table (struct update_subgroup *subgrp, + struct bgp_table *table, int rsclient) +{ + struct bgp_node *rn; + struct bgp_info *ri; + struct attr attr; + struct attr_extra extra; + struct peer *peer; + struct peer *onlypeer; + afi_t afi; + safi_t safi; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + + onlypeer = ((SUBGRP_PCOUNT (subgrp) == 1) ? + (SUBGRP_PFIRST (subgrp))->peer : NULL); + if (rsclient) + assert(onlypeer); + + if (!table) + table = (rsclient) ? onlypeer->rib[afi][safi] : peer->bgp->rib[afi][safi]; + + if (safi != SAFI_MPLS_VPN + && CHECK_FLAG (peer->af_flags[afi][safi], PEER_FLAG_DEFAULT_ORIGINATE)) + subgroup_default_originate (subgrp, 0); + + /* It's initialized in bgp_announce_[check|check_rsclient]() */ + attr.extra = &extra; + + for (rn = bgp_table_top (table); rn; rn = bgp_route_next (rn)) + for (ri = rn->info; ri; ri = ri->next) + + if (CHECK_FLAG (ri->flags, BGP_INFO_SELECTED)) + { + if (!rsclient + && subgroup_announce_check (ri, subgrp, &rn->p, &attr)) + bgp_adj_out_set_subgroup (rn, subgrp, &attr, ri); + else + bgp_adj_out_unset_subgroup (rn, subgrp); + } + + /* + * We walked through the whole table -- make sure our version number + * is consistent with the one on the table. This should allow + * subgroups to merge sooner if a peer comes up when the route node + * with the largest version is no longer in the table. This also + * covers the pathological case where all routes in the table have + * now been deleted. + */ + subgrp->version = max (subgrp->version, table->version); + + /* + * Start a task to merge the subgroup if necessary. + */ + update_subgroup_trigger_merge_check (subgrp, 0); +} + +/* + * subgroup_announce_route + * + * Refresh all routes out to a subgroup. + */ +void +subgroup_announce_route (struct update_subgroup *subgrp) +{ + struct bgp_node *rn; + struct bgp_table *table; + struct peer *onlypeer; + struct peer *peer; + + if (update_subgroup_needs_refresh (subgrp)) + { + update_subgroup_set_needs_refresh (subgrp, 0); + } + + /* + * First update is deferred until ORF or ROUTE-REFRESH is received + */ + onlypeer = ((SUBGRP_PCOUNT (subgrp) == 1) ? + (SUBGRP_PFIRST (subgrp))->peer : NULL); + if (onlypeer && + CHECK_FLAG (onlypeer-> + af_sflags[SUBGRP_AFI (subgrp)][SUBGRP_SAFI (subgrp)], + PEER_STATUS_ORF_WAIT_REFRESH)) + return; + + if (SUBGRP_SAFI (subgrp) != SAFI_MPLS_VPN) + subgroup_announce_table (subgrp, NULL, 0); + else + for (rn = bgp_table_top (update_subgroup_rib (subgrp)); rn; + rn = bgp_route_next (rn)) + if ((table = (rn->info)) != NULL) + subgroup_announce_table (subgrp, table, 0); + + peer = SUBGRP_PEER(subgrp); + if (CHECK_FLAG(peer->af_flags[SUBGRP_AFI(subgrp)][SUBGRP_SAFI(subgrp)], + PEER_FLAG_RSERVER_CLIENT)) + subgroup_announce_table (subgrp, NULL, 1); +} + +void +subgroup_default_originate (struct update_subgroup *subgrp, int withdraw) +{ + struct bgp *bgp; + struct attr attr; + struct aspath *aspath; + struct prefix p; + struct peer *from; + struct bgp_node *rn; + struct bgp_info *ri; + struct peer *peer; + int ret = RMAP_DENYMATCH; + afi_t afi; + safi_t safi; + + if (!subgrp) + return; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + + if (!(afi == AFI_IP || afi == AFI_IP6)) + return; + + bgp = peer->bgp; + from = bgp->peer_self; + + bgp_attr_default_set (&attr, BGP_ORIGIN_IGP); + aspath = attr.aspath; + attr.local_pref = bgp->default_local_pref; + memcpy (&attr.nexthop, &peer->nexthop.v4, IPV4_MAX_BYTELEN); + + if (afi == AFI_IP) + str2prefix ("0.0.0.0/0", &p); +#ifdef HAVE_IPV6 + else if (afi == AFI_IP6) + { + struct attr_extra *ae = attr.extra; + + str2prefix ("::/0", &p); + + /* IPv6 global nexthop must be included. */ + memcpy (&ae->mp_nexthop_global, &peer->nexthop.v6_global, + IPV6_MAX_BYTELEN); + ae->mp_nexthop_len = 16; + + /* If the peer is on shared nextwork and we have link-local + nexthop set it. */ + if (peer->shared_network + && !IN6_IS_ADDR_UNSPECIFIED (&peer->nexthop.v6_local)) + { + memcpy (&ae->mp_nexthop_local, &peer->nexthop.v6_local, + IPV6_MAX_BYTELEN); + ae->mp_nexthop_len = 32; + } + } +#endif /* HAVE_IPV6 */ + + if (peer->default_rmap[afi][safi].name) + { + SET_FLAG (bgp->peer_self->rmap_type, PEER_RMAP_TYPE_DEFAULT); + for (rn = bgp_table_top (bgp->rib[afi][safi]); rn; + rn = bgp_route_next (rn)) + { + for (ri = rn->info; ri; ri = ri->next) + { + struct attr dummy_attr; + struct attr_extra dummy_extra; + struct bgp_info info; + + /* Provide dummy so the route-map can't modify the attributes */ + dummy_attr.extra = &dummy_extra; + bgp_attr_dup (&dummy_attr, ri->attr); + info.peer = ri->peer; + info.attr = &dummy_attr; + + ret = + route_map_apply (peer->default_rmap[afi][safi].map, &rn->p, + RMAP_BGP, &info); + + /* The route map might have set attributes. If we don't flush them + * here, they will be leaked. */ + bgp_attr_flush (&dummy_attr); + if (ret != RMAP_DENYMATCH) + break; + } + if (ret != RMAP_DENYMATCH) + break; + } + bgp->peer_self->rmap_type = 0; + + if (ret == RMAP_DENYMATCH) + withdraw = 1; + } + + if (withdraw) + { + if (CHECK_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE)) + subgroup_default_withdraw_packet (subgrp); + UNSET_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE); + } + else + { + if (!CHECK_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE)) + { + SET_FLAG (subgrp->sflags, SUBGRP_STATUS_DEFAULT_ORIGINATE); + subgroup_default_update_packet (subgrp, &attr, from); + } + } + + bgp_attr_extra_free (&attr); + aspath_unintern (&aspath); +} + +/* + * Announce the BGP table to a subgroup. + * + * At startup, we try to optimize route announcement by coalescing the + * peer-up events. This is done only the first time - from then on, + * subgrp->v_coalesce will be set to zero and the normal logic + * prevails. + */ +void +subgroup_announce_all (struct update_subgroup *subgrp) +{ + if (!subgrp) + return; + + /* + * If coalesce timer value is not set, announce routes immediately. + */ + if (!subgrp->v_coalesce) + { + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu announcing all routes", + subgrp->update_group->id, subgrp->id); + subgroup_announce_route (subgrp); + return; + } + + /* + * We should wait for the coalesce timer. Arm the timer if not done. + */ + if (!subgrp->t_coalesce) + { + THREAD_TIMER_MSEC_ON (master, subgrp->t_coalesce, subgroup_coalesce_timer, + subgrp, subgrp->v_coalesce); + } +} + +/* + * Go through all update subgroups and set up the adv queue for the + * input route. + */ +void +group_announce_route (struct bgp *bgp, afi_t afi, safi_t safi, + struct bgp_node *rn, struct bgp_info *ri) +{ + struct updwalk_context ctx; + ctx.ri = ri; + ctx.rn = rn; + update_group_af_walk (bgp, afi, safi, group_announce_route_walkcb, &ctx); +} + +void +update_group_show_adj_queue (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id) +{ + updgrp_show_adj (bgp, afi, safi, vty, id, UPDWALK_FLAGS_ADVQUEUE); +} + +void +update_group_show_advertised (struct bgp *bgp, afi_t afi, safi_t safi, + struct vty *vty, u_int64_t id) +{ + updgrp_show_adj (bgp, afi, safi, vty, id, UPDWALK_FLAGS_ADVERTISED); +} + +void +update_group_announce (struct bgp *bgp) +{ + update_group_walk (bgp, update_group_announce_walkcb, NULL); +} + +void +update_group_announce_rrclients (struct bgp *bgp) +{ + update_group_walk (bgp, update_group_announce_rrc_walkcb, NULL); +} diff --git a/bgpd/bgp_updgrp_packet.c b/bgpd/bgp_updgrp_packet.c new file mode 100644 index 000000000000..8ca4e7931956 --- /dev/null +++ b/bgpd/bgp_updgrp_packet.c @@ -0,0 +1,1136 @@ +/** + * bgp_updgrp_packet.c: BGP update group packet handling routines + * + * @copyright Copyright (C) 2014 Cumulus Networks, Inc. + * + * @author Avneesh Sachdev + * @author Rajesh Varadarajan + * @author Pradosh Mohapatra + * + * This file is part of GNU Zebra. + * + * GNU Zebra is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * GNU Zebra is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Zebra; see the file COPYING. If not, write to the Free + * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +#include + +#include "prefix.h" +#include "thread.h" +#include "buffer.h" +#include "stream.h" +#include "command.h" +#include "sockunion.h" +#include "network.h" +#include "memory.h" +#include "filter.h" +#include "routemap.h" +#include "str.h" +#include "log.h" +#include "plist.h" +#include "linklist.h" +#include "workqueue.h" +#include "hash.h" +#include "queue.h" + +#include "bgpd/bgpd.h" +#include "bgpd/bgp_debug.h" +#include "bgpd/bgp_fsm.h" +#include "bgpd/bgp_route.h" +#include "bgpd/bgp_packet.h" +#include "bgpd/bgp_advertise.h" +#include "bgpd/bgp_updgrp.h" +#include "bgpd/bgp_nexthop.h" +#include "bgpd/bgp_nht.h" + +/******************** + * PRIVATE FUNCTIONS + ********************/ + +/******************** + * PUBLIC FUNCTIONS + ********************/ +struct bpacket * +bpacket_alloc () +{ + struct bpacket *pkt; + + pkt = + (struct bpacket *) XCALLOC (MTYPE_BGP_PACKET, sizeof (struct bpacket)); + + return pkt; +} + +void +bpacket_free (struct bpacket *pkt) +{ + if (pkt->buffer) + stream_free (pkt->buffer); + pkt->buffer = NULL; + XFREE (MTYPE_BGP_PACKET, pkt); +} + +void +bpacket_queue_init (struct bpacket_queue *q) +{ + TAILQ_INIT (&(q->pkts)); +} + +/* + * bpacket_queue_sanity_check + */ +void +bpacket_queue_sanity_check (struct bpacket_queue __attribute__ ((__unused__)) *q) +{ +#if 0 + struct bpacket *pkt; + + pkt = bpacket_queue_last (q); + assert (pkt); + assert (!pkt->buffer); + + /* + * Make sure the count of packets is correct. + */ + int num_pkts = 0; + + pkt = bpacket_queue_first (q); + while (pkt) + { + num_pkts++; + + if (num_pkts > q->curr_count) + assert (0); + + pkt = TAILQ_NEXT (pkt, pkt_train); + } + + assert (num_pkts == q->curr_count); +#endif +} + +/* + * bpacket_queue_add_packet + * + * Internal function of bpacket_queue - and adds a + * packet entry to the end of the list. + * + * Users of bpacket_queue should use bpacket_queue_add instead. + */ +static void +bpacket_queue_add_packet (struct bpacket_queue *q, struct bpacket *pkt) +{ + struct bpacket *last_pkt; + + if (TAILQ_EMPTY (&(q->pkts))) + TAILQ_INSERT_TAIL (&(q->pkts), pkt, pkt_train); + else + { + last_pkt = bpacket_queue_last (q); + TAILQ_INSERT_AFTER (&(q->pkts), last_pkt, pkt, pkt_train); + } + q->curr_count++; + if (q->hwm_count < q->curr_count) + q->hwm_count = q->curr_count; +} + +/* + * Adds a packet to the bpacket_queue. + * + * The stream passed is consumed by this function. So, the caller should + * not free or use the stream after + * invoking this function. + */ +struct bpacket * +bpacket_queue_add (struct bpacket_queue *q, struct stream *s, + struct bpacket_attr_vec_arr *vecarrp) +{ + struct bpacket *pkt; + struct bpacket *last_pkt; + + + pkt = bpacket_alloc (); + if (TAILQ_EMPTY (&(q->pkts))) + { + pkt->ver = 1; + pkt->buffer = s; + if (vecarrp) + memcpy (&pkt->arr, vecarrp, sizeof (struct bpacket_attr_vec_arr)); + else + bpacket_attr_vec_arr_reset (&pkt->arr); + bpacket_queue_add_packet (q, pkt); + bpacket_queue_sanity_check (q); + return pkt; + } + + /* + * Fill in the new information into the current sentinel and create a + * new sentinel. + */ + bpacket_queue_sanity_check (q); + last_pkt = bpacket_queue_last (q); + assert (last_pkt->buffer == NULL); + last_pkt->buffer = s; + if (vecarrp) + memcpy (&last_pkt->arr, vecarrp, sizeof (struct bpacket_attr_vec_arr)); + else + bpacket_attr_vec_arr_reset (&last_pkt->arr); + + pkt->ver = last_pkt->ver; + pkt->ver++; + bpacket_queue_add_packet (q, pkt); + + bpacket_queue_sanity_check (q); + return last_pkt; +} + +struct bpacket * +bpacket_queue_first (struct bpacket_queue *q) +{ + return (TAILQ_FIRST (&(q->pkts))); +} + +struct bpacket * +bpacket_queue_last (struct bpacket_queue *q) +{ + return TAILQ_LAST (&(q->pkts), pkt_queue); +} + +struct bpacket * +bpacket_queue_remove (struct bpacket_queue *q) +{ + struct bpacket *first; + + first = bpacket_queue_first (q); + if (first) + { + TAILQ_REMOVE (&(q->pkts), first, pkt_train); + q->curr_count--; + } + return first; +} + +unsigned int +bpacket_queue_length (struct bpacket_queue *q) +{ + return q->curr_count - 1; +} + +unsigned int +bpacket_queue_hwm_length (struct bpacket_queue *q) +{ + return q->hwm_count - 1; +} + +int +bpacket_queue_is_full (struct bgp *bgp, struct bpacket_queue *q) +{ + if (q->curr_count >= bgp->default_subgroup_pkt_queue_max) + return 1; + return 0; +} + +void +bpacket_add_peer (struct bpacket *pkt, struct peer_af *paf) +{ + if (!pkt || !paf) + return; + + LIST_INSERT_HEAD (&(pkt->peers), paf, pkt_train); + paf->next_pkt_to_send = pkt; +} + +/* + * bpacket_queue_cleanup + */ +void +bpacket_queue_cleanup (struct bpacket_queue *q) +{ + struct bpacket *pkt; + + while ((pkt = bpacket_queue_remove (q))) + { + bpacket_free (pkt); + } +} + +/* + * bpacket_queue_compact + * + * Delete packets that do not need to be transmitted to any peer from + * the queue. + * + * @return the number of packets deleted. + */ +static int +bpacket_queue_compact (struct bpacket_queue *q) +{ + int num_deleted; + struct bpacket *pkt, *removed_pkt; + + num_deleted = 0; + + while (1) + { + pkt = bpacket_queue_first (q); + if (!pkt) + break; + + /* + * Don't delete the sentinel. + */ + if (!pkt->buffer) + break; + + if (!LIST_EMPTY (&(pkt->peers))) + break; + + removed_pkt = bpacket_queue_remove (q); + assert (pkt == removed_pkt); + bpacket_free (removed_pkt); + + num_deleted++; + } + + bpacket_queue_sanity_check (q); + return num_deleted; +} + +void +bpacket_queue_advance_peer (struct peer_af *paf) +{ + struct bpacket *pkt; + struct bpacket *old_pkt; + + old_pkt = paf->next_pkt_to_send; + if (old_pkt->buffer == NULL) + /* Already at end of list */ + return; + + LIST_REMOVE (paf, pkt_train); + pkt = TAILQ_NEXT (old_pkt, pkt_train); + bpacket_add_peer (pkt, paf); + + if (!bpacket_queue_compact (PAF_PKTQ (paf))) + return; + + /* + * Deleted one or more packets. Check if we can now merge this + * peer's subgroup into another subgroup. + */ + update_subgroup_check_merge (paf->subgroup, "advanced peer in queue"); +} + +/* + * bpacket_queue_remove_peer + * + * Remove the peer from the packet queue of the subgroup it belongs + * to. + */ +void +bpacket_queue_remove_peer (struct peer_af *paf) +{ + struct bpacket_queue *q; + + q = PAF_PKTQ (paf); + assert (q); + if (!q) + return; + + LIST_REMOVE (paf, pkt_train); + paf->next_pkt_to_send = NULL; + + bpacket_queue_compact (q); +} + +unsigned int +bpacket_queue_virtual_length (struct peer_af *paf) +{ + struct bpacket *pkt; + struct bpacket *last; + struct bpacket_queue *q; + + pkt = paf->next_pkt_to_send; + if (!pkt || (pkt->buffer == NULL)) + /* Already at end of list */ + return 0; + + q = PAF_PKTQ (paf); + if (TAILQ_EMPTY (&(q->pkts))) + return 0; + + last = TAILQ_LAST (&(q->pkts), pkt_queue); + if (last->ver >= pkt->ver) + return last->ver - pkt->ver; + + /* sequence # rolled over */ + return (UINT_MAX - pkt->ver + 1) + last->ver; +} + +/* + * Dump the bpacket queue + */ +void +bpacket_queue_show_vty (struct bpacket_queue *q, struct vty *vty) +{ + struct bpacket *pkt; + struct peer_af *paf; + + pkt = bpacket_queue_first (q); + while (pkt) + { + vty_out (vty, " Packet %p ver %u buffer %p%s", pkt, pkt->ver, + pkt->buffer, VTY_NEWLINE); + + LIST_FOREACH (paf, &(pkt->peers), pkt_train) + { + vty_out (vty, " - %s%s", paf->peer->host, VTY_NEWLINE); + } + pkt = bpacket_next (pkt); + } + return; +} + +struct stream * +bpacket_reformat_for_peer (struct bpacket *pkt, struct peer_af *paf) +{ + struct stream *s = NULL; + bpacket_attr_vec *vec; + + s = stream_dup (pkt->buffer); + + vec = &pkt->arr.entries[BGP_ATTR_VEC_NH]; + if (CHECK_FLAG (vec->flags, BPACKET_ATTRVEC_FLAGS_UPDATED)) + { + u_int8_t nhlen; + int route_map_sets_nh; + nhlen = stream_getc_from (s, vec->offset); + + route_map_sets_nh = CHECK_FLAG (vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_CHANGED); + + if (paf->afi == AFI_IP) + { + struct in_addr v4nh; + + stream_get_from (&v4nh, s, vec->offset + 1, 4); + + /* If NH unavailable from attribute or the route-map has set it to + * be the peering address, use peer's NH. The "NH unavailable" case + * also covers next-hop-self and some other scenarios -- see + * subgroup_announce_check(). The only other case where we use the + * peer's NH is if it is an EBGP multiaccess scenario and there is + * no next-hop-unchanged setting. + */ + if (!v4nh.s_addr || + (route_map_sets_nh && + CHECK_FLAG(vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS))) + stream_put_in_addr_at (s, vec->offset + 1, &paf->peer->nexthop.v4); + else if (paf->peer->sort == BGP_PEER_EBGP && + !peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_UNCHANGED)) + { + if (bgp_multiaccess_check_v4 (v4nh, paf->peer) == 0) + stream_put_in_addr_at (s, vec->offset + 1, + &paf->peer->nexthop.v4); + } + +#if 0 + if (!v4nh.s_addr) + nhtouse = paf->peer->nexthop.v4; + + /* + * If NH is available from attribute (which is after outbound + * policy application), always use it if it has been specified + * by the policy. Otherwise, the decision to make is whether + * we need to set ourselves as the next-hop or not. Here are + * the conditions for that (1 OR 2): + * + * (1) if the configuration says: 'next-hop-self' + * (2) if the peer is EBGP AND not a third-party-nexthop type + * + * There are some exceptions even if the above conditions apply. + * Those are: + * (a) if the configuration says: 'next-hop-unchanged'. Honor that + * always. Not set 'self' as next-hop. + * (b) if we are reflecting the routes (IBGP->IBGP) and the config + * is _not_ forcing next-hop-self. We should pass on the + * next-hop unchanged for reflected routes. + */ + if (route_map_sets_nh) + { + /* + * If address is specified, nothing to do; if specified as + * 'peer-address', compute the value to use. + * + * NOTE: If we are reflecting routes, the policy could have set + * this only if outbound policy has been allowed for route + * reflection -- handled in announce_check(). + */ + if (CHECK_FLAG(vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS)) + nhtouse = paf->peer->nexthop.v4; + } + else if (peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_SELF) + || (paf->peer->sort == BGP_PEER_EBGP && + (bgp_multiaccess_check_v4 (v4nh, paf->peer) == 0))) + { + if (!(peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_UNCHANGED) + || (CHECK_FLAG(vec->flags, BPACKET_ATTRVEC_FLAGS_REFLECTED) && + !peer_af_flag_check(paf->peer, paf->afi, paf->safi, + PEER_FLAG_FORCE_NEXTHOP_SELF)))) + nhtouse = paf->peer->nexthop.v4; + } +#endif + + } + else if (paf->afi == AFI_IP6) + { + struct in6_addr v6nhglobal; + struct in6_addr v6nhlocal; + + /* + * The logic here is rather similar to that for IPv4, the + * additional work being to handle 1 or 2 nexthops. + */ + stream_get_from (&v6nhglobal, s, vec->offset + 1, 16); + if (IN6_IS_ADDR_UNSPECIFIED (&v6nhglobal) || + (route_map_sets_nh && + CHECK_FLAG(vec->flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS))) + stream_put_in6_addr_at (s, vec->offset + 1, + &paf->peer->nexthop.v6_global); + else if (paf->peer->sort == BGP_PEER_EBGP && + !peer_af_flag_check (paf->peer, paf->afi, paf->safi, + PEER_FLAG_NEXTHOP_UNCHANGED)) + { + stream_put_in6_addr_at (s, vec->offset + 1, + &paf->peer->nexthop.v6_global); + } + + if (nhlen == 32) + { + stream_get_from (&v6nhlocal, s, vec->offset + 1 + 16, 16); + if (IN6_IS_ADDR_UNSPECIFIED (&v6nhlocal)) + stream_put_in6_addr_at (s, vec->offset + 1 + 16, + &paf->peer->nexthop.v6_local); + } + } + } + + bgp_packet_add (paf->peer, s); + return s; +} + +/* + * Update the vecarr offsets to go beyond 'pos' bytes, i.e. add 'pos' + * to each offset. + */ +static void +bpacket_attr_vec_arr_update (struct bpacket_attr_vec_arr *vecarr, size_t pos) +{ + int i; + + if (!vecarr) + return; + + for (i = 0; i < BGP_ATTR_VEC_MAX; i++) + vecarr->entries[i].offset += pos; +} + +/* + * Return if there are packets to build for this subgroup. + */ +int +subgroup_packets_to_build (struct update_subgroup *subgrp) +{ + struct bgp_advertise *adv; + + if (!subgrp) + return 0; + + adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->withdraw); + if (adv) + return 1; + + adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->update); + if (adv) + return 1; + + return 0; +} + +/* Make BGP update packet. */ +struct bpacket * +subgroup_update_packet (struct update_subgroup *subgrp) +{ + struct bpacket_attr_vec_arr vecarr; + struct bpacket *pkt; + struct peer *peer; + struct stream *s; + struct stream *snlri; + struct stream *packet; + struct bgp_adj_out *adj; + struct bgp_advertise *adv; + struct bgp_node *rn = NULL; + struct bgp_info *binfo = NULL; + bgp_size_t total_attr_len = 0; + unsigned long attrlen_pos = 0; + size_t mpattrlen_pos = 0; + size_t mpattr_pos = 0; + afi_t afi; + safi_t safi; + int space_remaining = 0; + int space_needed = 0; + char send_attr_str[BUFSIZ]; + int send_attr_printed; + int num_pfx = 0; + + + if (!subgrp) + return NULL; + + if (bpacket_queue_is_full (SUBGRP_INST (subgrp), SUBGRP_PKTQ (subgrp))) + return NULL; + + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + s = subgrp->work; + stream_reset (s); + snlri = subgrp->scratch; + stream_reset (snlri); + + bpacket_attr_vec_arr_reset (&vecarr); + + adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->update); + while (adv) + { + assert (adv->rn); + rn = adv->rn; + adj = adv->adj; + if (adv->binfo) + binfo = adv->binfo; + + space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - + BGP_MAX_PACKET_SIZE_OVERFLOW; + space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); + + /* When remaining space can't include NLRI and it's length. */ + if (space_remaining < space_needed) + break; + + /* If packet is empty, set attribute. */ + if (stream_empty (s)) + { + struct peer *from = NULL; + + if (binfo) + from = binfo->peer; + + /* 1: Write the BGP message header - 16 bytes marker, 2 bytes length, + * one byte message type. + */ + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + + /* 2: withdrawn routes length */ + stream_putw (s, 0); + + /* 3: total attributes length - attrlen_pos stores the position */ + attrlen_pos = stream_get_endp (s); + stream_putw (s, 0); + + /* 4: if there is MP_REACH_NLRI attribute, that should be the first + * attribute, according to draft-ietf-idr-error-handling. Save the + * position. + */ + mpattr_pos = stream_get_endp (s); + + /* 5: Encode all the attributes, except MP_REACH_NLRI attr. */ + total_attr_len = bgp_packet_attribute (NULL, peer, s, + adv->baa->attr, &vecarr, + NULL, afi, safi, + from, NULL, NULL); + + space_remaining = STREAM_CONCAT_REMAIN (s, snlri, STREAM_SIZE(s)) - + BGP_MAX_PACKET_SIZE_OVERFLOW; + space_needed = BGP_NLRI_LENGTH + PSIZE (rn->p.prefixlen); + + /* If the attributes alone do not leave any room for NLRI then + * return */ + if (space_remaining < space_needed) + { + zlog_err ("u%llu:s%llu attributes too long, cannot send UPDATE", + subgrp->update_group->id, subgrp->id); + + /* Flush the FIFO update queue */ + while (adv) + adv = bgp_advertise_clean_subgroup (subgrp, adj); + return NULL; + } + + if (BGP_DEBUG (update, UPDATE_OUT) || + BGP_DEBUG (update, UPDATE_PREFIX)) + { + memset (send_attr_str, 0, BUFSIZ); + send_attr_printed = 0; + bgp_dump_attr (peer, adv->baa->attr, send_attr_str, BUFSIZ); + } + } + + if (afi == AFI_IP && safi == SAFI_UNICAST) + stream_put_prefix (s, &rn->p); + else + { + /* Encode the prefix in MP_REACH_NLRI attribute */ + struct prefix_rd *prd = NULL; + u_char *tag = NULL; + + if (rn->prn) + prd = (struct prefix_rd *) &rn->prn->p; + if (binfo && binfo->extra) + tag = binfo->extra->tag; + + if (stream_empty (snlri)) + mpattrlen_pos = bgp_packet_mpattr_start (snlri, afi, safi, + &vecarr, adv->baa->attr); + bgp_packet_mpattr_prefix (snlri, afi, safi, &rn->p, prd, tag); + } + + num_pfx++; + + if (bgp_debug_update(NULL, &rn->p, subgrp->update_group, 0)) + { + char buf[INET6_BUFSIZ]; + + if (!send_attr_printed) + { + zlog_debug ("u%llu:s%llu send UPDATE w/ attr: %s", + subgrp->update_group->id, subgrp->id, send_attr_str); + send_attr_printed = 1; + } + + zlog_debug ("u%llu:s%llu send UPDATE %s/%d", + subgrp->update_group->id, subgrp->id, + inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, + INET6_BUFSIZ), rn->p.prefixlen); + } + + /* Synchnorize attribute. */ + if (adj->attr) + bgp_attr_unintern (&adj->attr); + else + subgrp->scount++; + + adj->attr = bgp_attr_intern (adv->baa->attr); + + adv = bgp_advertise_clean_subgroup (subgrp, adj); + } + + if (!stream_empty (s)) + { + if (!stream_empty (snlri)) + { + bgp_packet_mpattr_end (snlri, mpattrlen_pos); + total_attr_len += stream_get_endp (snlri); + } + + /* set the total attribute length correctly */ + stream_putw_at (s, attrlen_pos, total_attr_len); + + if (!stream_empty (snlri)) + { + packet = stream_dupcat (s, snlri, mpattr_pos); + bpacket_attr_vec_arr_update (&vecarr, mpattr_pos); + } + else + packet = stream_dup (s); + bgp_packet_set_size (packet); + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu UPDATE len %d numpfx %d", + subgrp->update_group->id, subgrp->id, + (stream_get_endp(packet) - stream_get_getp(packet)), num_pfx); + pkt = bpacket_queue_add (SUBGRP_PKTQ (subgrp), packet, &vecarr); + stream_reset (s); + stream_reset (snlri); + return pkt; + } + return NULL; +} + +/* Make BGP withdraw packet. */ +/* For ipv4 unicast: + 16-octet marker | 2-octet length | 1-octet type | + 2-octet withdrawn route length | withdrawn prefixes | 2-octet attrlen (=0) +*/ +/* For other afi/safis: + 16-octet marker | 2-octet length | 1-octet type | + 2-octet withdrawn route length (=0) | 2-octet attrlen | + mp_unreach attr type | attr len | afi | safi | withdrawn prefixes +*/ +struct bpacket * +subgroup_withdraw_packet (struct update_subgroup *subgrp) +{ + struct bpacket *pkt; + struct stream *s; + struct bgp_adj_out *adj; + struct bgp_advertise *adv; + struct peer *peer; + struct bgp_node *rn; + bgp_size_t unfeasible_len; + bgp_size_t total_attr_len; + size_t mp_start = 0; + size_t attrlen_pos = 0; + size_t mplen_pos = 0; + u_char first_time = 1; + afi_t afi; + safi_t safi; + int space_remaining = 0; + int space_needed = 0; + int num_pfx = 0; + + if (!subgrp) + return NULL; + + if (bpacket_queue_is_full (SUBGRP_INST (subgrp), SUBGRP_PKTQ (subgrp))) + return NULL; + + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + s = subgrp->work; + stream_reset (s); + + while ((adv = BGP_ADV_FIFO_HEAD (&subgrp->sync->withdraw)) != NULL) + { + assert (adv->rn); + adj = adv->adj; + rn = adv->rn; + + space_remaining = STREAM_REMAIN (s) - + BGP_MAX_PACKET_SIZE_OVERFLOW; + space_needed = (BGP_NLRI_LENGTH + BGP_TOTAL_ATTR_LEN + + PSIZE (rn->p.prefixlen)); + + if (space_remaining < space_needed) + break; + + if (stream_empty (s)) + { + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + stream_putw (s, 0); /* unfeasible routes length */ + } + else + first_time = 0; + + if (afi == AFI_IP && safi == SAFI_UNICAST) + stream_put_prefix (s, &rn->p); + else + { + struct prefix_rd *prd = NULL; + + if (rn->prn) + prd = (struct prefix_rd *) &rn->prn->p; + + /* If first time, format the MP_UNREACH header */ + if (first_time) + { + attrlen_pos = stream_get_endp (s); + /* total attr length = 0 for now. reevaluate later */ + stream_putw (s, 0); + mp_start = stream_get_endp (s); + mplen_pos = bgp_packet_mpunreach_start (s, afi, safi); + } + + bgp_packet_mpunreach_prefix (s, &rn->p, afi, safi, prd, NULL); + } + + num_pfx++; + + if (bgp_debug_update(NULL, &rn->p, subgrp->update_group, 0)) + { + char buf[INET6_BUFSIZ]; + + zlog_debug ("u%llu:s%llu send UPDATE %s/%d -- unreachable", + subgrp->update_group->id, subgrp->id, + inet_ntop (rn->p.family, &(rn->p.u.prefix), buf, + INET6_BUFSIZ), rn->p.prefixlen); + } + + subgrp->scount--; + + bgp_adj_out_remove_subgroup (rn, adj, subgrp); + bgp_unlock_node (rn); + } + + if (!stream_empty (s)) + { + if (afi == AFI_IP && safi == SAFI_UNICAST) + { + unfeasible_len + = stream_get_endp (s) - BGP_HEADER_SIZE - BGP_UNFEASIBLE_LEN; + stream_putw_at (s, BGP_HEADER_SIZE, unfeasible_len); + stream_putw (s, 0); + } + else + { + /* Set the mp_unreach attr's length */ + bgp_packet_mpunreach_end (s, mplen_pos); + + /* Set total path attribute length. */ + total_attr_len = stream_get_endp (s) - mp_start; + stream_putw_at (s, attrlen_pos, total_attr_len); + } + bgp_packet_set_size (s); + if (bgp_debug_update(NULL, NULL, subgrp->update_group, 0)) + zlog_debug ("u%llu:s%llu UPDATE (withdraw) len %d numpfx %d", + subgrp->update_group->id, subgrp->id, + (stream_get_endp(s) - stream_get_getp(s)), num_pfx); + pkt = bpacket_queue_add (SUBGRP_PKTQ (subgrp), stream_dup (s), NULL); + stream_reset (s); + return pkt; + } + + return NULL; +} + +void +subgroup_default_update_packet (struct update_subgroup *subgrp, + struct attr *attr, struct peer *from) +{ + struct stream *s; + struct stream *packet; + struct peer *peer; + struct prefix p; + unsigned long pos; + bgp_size_t total_attr_len; + afi_t afi; + safi_t safi; + struct bpacket_attr_vec_arr vecarr; + + if (DISABLE_BGP_ANNOUNCE) + return; + + if (!subgrp) + return; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + bpacket_attr_vec_arr_reset (&vecarr); + + if (afi == AFI_IP) + str2prefix ("0.0.0.0/0", &p); +#ifdef HAVE_IPV6 + else + str2prefix ("::/0", &p); +#endif /* HAVE_IPV6 */ + + /* Logging the attribute. */ + if (bgp_debug_update(NULL, &p, subgrp->update_group, 0)) + { + char attrstr[BUFSIZ]; + char buf[INET6_BUFSIZ]; + attrstr[0] = '\0'; + + bgp_dump_attr (peer, attr, attrstr, BUFSIZ); + zlog_debug ("u%llu:s%llu send UPDATE %s/%d %s", + (SUBGRP_UPDGRP (subgrp))->id, subgrp->id, + inet_ntop (p.family, &(p.u.prefix), buf, INET6_BUFSIZ), + p.prefixlen, attrstr); + } + + s = stream_new (BGP_MAX_PACKET_SIZE); + + /* Make BGP update packet. */ + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + + /* Unfeasible Routes Length. */ + stream_putw (s, 0); + + /* Make place for total attribute length. */ + pos = stream_get_endp (s); + stream_putw (s, 0); + total_attr_len = bgp_packet_attribute (NULL, peer, s, attr, &vecarr, &p, + afi, safi, from, NULL, NULL); + + /* Set Total Path Attribute Length. */ + stream_putw_at (s, pos, total_attr_len); + + /* NLRI set. */ + if (p.family == AF_INET && safi == SAFI_UNICAST) + stream_put_prefix (s, &p); + + /* Set size. */ + bgp_packet_set_size (s); + + packet = stream_dup (s); + stream_free (s); + (void) bpacket_queue_add (SUBGRP_PKTQ (subgrp), packet, &vecarr); + subgroup_trigger_write(subgrp); +} + +void +subgroup_default_withdraw_packet (struct update_subgroup *subgrp) +{ + struct peer *peer; + struct stream *s; + struct stream *packet; + struct prefix p; + unsigned long attrlen_pos = 0; + unsigned long cp; + bgp_size_t unfeasible_len; + bgp_size_t total_attr_len; + size_t mp_start = 0; + size_t mplen_pos = 0; + afi_t afi; + safi_t safi; + + if (DISABLE_BGP_ANNOUNCE) + return; + + peer = SUBGRP_PEER (subgrp); + afi = SUBGRP_AFI (subgrp); + safi = SUBGRP_SAFI (subgrp); + + if (afi == AFI_IP) + str2prefix ("0.0.0.0/0", &p); +#ifdef HAVE_IPV6 + else + str2prefix ("::/0", &p); +#endif /* HAVE_IPV6 */ + + total_attr_len = 0; + + if (bgp_debug_update(NULL, &p, subgrp->update_group, 0)) + { + char buf[INET6_BUFSIZ]; + + zlog_debug ("u%llu:s%llu send UPDATE %s/%d -- unreachable", + (SUBGRP_UPDGRP (subgrp))->id, subgrp->id, inet_ntop (p.family, + &(p.u. + prefix), + buf, + INET6_BUFSIZ), + p.prefixlen); + } + + s = stream_new (BGP_MAX_PACKET_SIZE); + + /* Make BGP update packet. */ + bgp_packet_set_marker (s, BGP_MSG_UPDATE); + + /* Unfeasible Routes Length. */ ; + cp = stream_get_endp (s); + stream_putw (s, 0); + + /* Withdrawn Routes. */ + if (p.family == AF_INET && safi == SAFI_UNICAST) + { + stream_put_prefix (s, &p); + + unfeasible_len = stream_get_endp (s) - cp - 2; + + /* Set unfeasible len. */ + stream_putw_at (s, cp, unfeasible_len); + + /* Set total path attribute length. */ + stream_putw (s, 0); + } + else + { + attrlen_pos = stream_get_endp (s); + stream_putw (s, 0); + mp_start = stream_get_endp (s); + mplen_pos = bgp_packet_mpunreach_start (s, afi, safi); + bgp_packet_mpunreach_prefix (s, &p, afi, safi, NULL, NULL); + + /* Set the mp_unreach attr's length */ + bgp_packet_mpunreach_end (s, mplen_pos); + + /* Set total path attribute length. */ + total_attr_len = stream_get_endp (s) - mp_start; + stream_putw_at (s, attrlen_pos, total_attr_len); + } + + bgp_packet_set_size (s); + + packet = stream_dup (s); + stream_free (s); + + (void) bpacket_queue_add (SUBGRP_PKTQ (subgrp), packet, NULL); + subgroup_trigger_write(subgrp); +} + +static void +bpacket_vec_arr_inherit_attr_flags (struct bpacket_attr_vec_arr *vecarr, + bpacket_attr_vec_type type, + struct attr *attr) +{ + if (CHECK_FLAG (attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_CHANGED)) + SET_FLAG (vecarr->entries[BGP_ATTR_VEC_NH].flags, + BPACKET_ATTRVEC_FLAGS_RMAP_CHANGED); + + if (CHECK_FLAG (attr->rmap_change_flags, + BATTR_RMAP_NEXTHOP_PEER_ADDRESS)) + SET_FLAG (vecarr->entries[BGP_ATTR_VEC_NH].flags, + BPACKET_ATTRVEC_FLAGS_RMAP_NH_PEER_ADDRESS); + + if (CHECK_FLAG (attr->rmap_change_flags, BATTR_REFLECTED)) + SET_FLAG (vecarr->entries[BGP_ATTR_VEC_NH].flags, + BPACKET_ATTRVEC_FLAGS_REFLECTED); +} + +/* Reset the Attributes vector array. The vector array is used to override + * certain output parameters in the packet for a particular peer + */ +void +bpacket_attr_vec_arr_reset (struct bpacket_attr_vec_arr *vecarr) +{ + int i; + + if (!vecarr) + return; + + i = 0; + while (i < BGP_ATTR_VEC_MAX) + { + vecarr->entries[i].flags = 0; + vecarr->entries[i].offset = 0; + i++; + } +} + +/* Setup a particular node entry in the vecarr */ +void +bpacket_attr_vec_arr_set_vec (struct bpacket_attr_vec_arr *vecarr, + bpacket_attr_vec_type type, struct stream *s, + struct attr *attr) +{ + if (!vecarr) + return; + assert (type < BGP_ATTR_VEC_MAX); + + SET_FLAG (vecarr->entries[type].flags, BPACKET_ATTRVEC_FLAGS_UPDATED); + vecarr->entries[type].offset = stream_get_endp (s); + if (attr) + bpacket_vec_arr_inherit_attr_flags(vecarr, type, attr); +} diff --git a/bgpd/bgp_vty.c b/bgpd/bgp_vty.c index eb3c01e9f557..5cfbaebeb39c 100644 --- a/bgpd/bgp_vty.c +++ b/bgpd/bgp_vty.c @@ -30,6 +30,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "log.h" #include "memory.h" #include "hash.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_advertise.h" @@ -50,6 +51,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "bgpd/bgp_vty.h" #include "bgpd/bgp_mpath.h" #include "bgpd/bgp_packet.h" +#include "bgpd/bgp_updgrp.h" extern struct in_addr router_id_zebra; @@ -1027,6 +1029,51 @@ DEFUN (no_bgp_wpkt_quanta, return bgp_wpkt_quanta_config_vty(vty, argv[0], 0); } +int +bgp_coalesce_config_vty (struct vty *vty, const char *num, char set) +{ + struct bgp *bgp; + + bgp = vty->index; + + if (set) + VTY_GET_INTEGER_RANGE ("coalesce-time", bgp->coalesce_time, num, + 0, 4294967295); + else + bgp->coalesce_time = BGP_DEFAULT_SUBGROUP_COALESCE_TIME; + + return CMD_SUCCESS; +} + +int +bgp_config_write_coalesce_time (struct vty *vty, struct bgp *bgp) +{ + if (bgp->coalesce_time != BGP_DEFAULT_SUBGROUP_COALESCE_TIME) + vty_out (vty, " coalesce-time %d%s", + bgp->coalesce_time, VTY_NEWLINE); + + return 0; +} + + +DEFUN (bgp_coalesce_time, + bgp_coalesce_time_cmd, + "coalesce-time <0-4294967295>", + "Subgroup coalesce timer\n" + "Subgroup coalesce timer value (in ms)\n") +{ + return bgp_coalesce_config_vty(vty, argv[0], 1); +} + +DEFUN (no_bgp_coalesce_time, + no_bgp_coalesce_time_cmd, + "no coalesce-time <0-4294967295>", + "Subgroup coalesce timer\n" + "Subgroup coalesce timer value (in ms)\n") +{ + return bgp_coalesce_config_vty(vty, argv[0], 0); +} + /* Maximum-paths configuration */ DEFUN (bgp_maxpaths, bgp_maxpaths_cmd, @@ -1757,37 +1804,39 @@ ALIAS (no_bgp_default_local_preference, "local preference (higher=more preferred)\n" "Configure default local preference value\n") -static void -peer_announce_routes_if_rmap_out (struct bgp *bgp) +DEFUN (bgp_default_subgroup_pkt_queue_max, + bgp_default_subgroup_pkt_queue_max_cmd, + "bgp default subgroup-pkt-queue-max <20-100>", + "BGP specific commands\n" + "Configure BGP defaults\n" + "subgroup-pkt-queue-max\n" + "Configure subgroup packet queue max\n") { - struct peer *peer; - struct listnode *node, *nnode; - struct bgp_filter *filter; - afi_t afi; - safi_t safi; + struct bgp *bgp; + u_int32_t max_size; - /* Reannounce all routes to appropriate neighbors */ - for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) - { - for (afi = AFI_IP; afi < AFI_MAX; afi++) - for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) - { - if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_REFLECTOR_CLIENT)) - { - /* check if there's an out route-map on this client */ - filter = &peer->filter[afi][safi]; - if (ROUTE_MAP_OUT_NAME(filter)) - { - if (bgp_debug_update(peer, NULL, 0)) - zlog_debug("%s: Announcing routes again for peer %s" - "(afi=%d, safi=%d", __func__, peer->host, afi, - safi); - - bgp_announce_route_all(peer); - } - } - } - } + bgp = vty->index; + + VTY_GET_INTEGER ("subgroup packet queue max", max_size, argv[0]); + + bgp_default_subgroup_pkt_queue_max_set (bgp, max_size); + + return CMD_SUCCESS; +} + +DEFUN (no_bgp_default_subgroup_pkt_queue_max, + no_bgp_default_subgroup_pkt_queue_max_cmd, + "no bgp default subgroup-pkt-queue-max", + NO_STR + "BGP specific commands\n" + "Configure BGP defaults\n" + "subgroup-pkt-queue-max\n") +{ + struct bgp *bgp; + + bgp = vty->index; + bgp_default_subgroup_pkt_queue_max_unset (bgp); + return CMD_SUCCESS; } DEFUN (bgp_rr_allow_outbound_policy, @@ -1806,7 +1855,7 @@ DEFUN (bgp_rr_allow_outbound_policy, if (!bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) { bgp_flag_set(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY); - peer_announce_routes_if_rmap_out(bgp); + update_group_announce_rrclients(bgp); } return CMD_SUCCESS; @@ -1828,7 +1877,7 @@ DEFUN (no_bgp_rr_allow_outbound_policy, if (bgp_flag_check(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) { bgp_flag_unset(bgp, BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY); - peer_announce_routes_if_rmap_out(bgp); + update_group_announce_rrclients(bgp); } return CMD_SUCCESS; @@ -2197,6 +2246,43 @@ ALIAS (no_neighbor_local_as, "Do not prepend local-as to updates from ebgp peers\n" "Do not prepend local-as to updates from ibgp peers\n") +DEFUN (neighbor_solo, + neighbor_solo_cmd, + NEIGHBOR_CMD2 "solo", + NEIGHBOR_STR + NEIGHBOR_ADDR_STR2 + "Solo peer - part of its own update group\n") +{ + struct peer *peer; + int ret; + + peer = peer_and_group_lookup_vty (vty, argv[0]); + if (! peer) + return CMD_WARNING; + + ret = update_group_adjust_soloness(peer, 1); + return bgp_vty_return (vty, ret); +} + +DEFUN (no_neighbor_solo, + no_neighbor_solo_cmd, + NO_NEIGHBOR_CMD2 "solo", + NO_STR + NEIGHBOR_STR + NEIGHBOR_ADDR_STR2 + "Solo peer - part of its own update group\n") +{ + struct peer *peer; + int ret; + + peer = peer_and_group_lookup_vty (vty, argv[0]); + if (! peer) + return CMD_WARNING; + + ret = update_group_adjust_soloness(peer, 0); + return bgp_vty_return (vty, ret); +} + DEFUN (neighbor_password, neighbor_password_cmd, NEIGHBOR_CMD2 "password LINE", @@ -7684,6 +7770,12 @@ DEFUN (show_bgp_memory, mtype_memstr (memstrbuf, sizeof (memstrbuf), count * sizeof (struct bgp_static)), VTY_NEWLINE); + + if ((count = mtype_stats_alloc (MTYPE_BGP_PACKET))) + vty_out (vty, "%ld Packets, using %s of memory%s", count, + mtype_memstr (memstrbuf, sizeof (memstrbuf), + count * sizeof (struct bpacket)), + VTY_NEWLINE); /* Adj-In/Out */ if ((count = mtype_stats_alloc (MTYPE_BGP_ADJ_IN))) @@ -7876,6 +7968,9 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del if (bgp->v_maxmed_admin) vty_out (vty, "Max-med administrative active%s", VTY_NEWLINE); + vty_out(vty, "BGP table version %llu%s", + bgp_table_version(bgp->rib[afi][safi]), VTY_NEWLINE); + ents = bgp_table_count (bgp->rib[afi][safi]); vty_out (vty, "RIB entries %ld, using %s of memory%s", ents, mtype_memstr (memstrbuf, sizeof (memstrbuf), @@ -7927,7 +8022,7 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del vty_out(vty, "%c", *delimit); if (!delimit) - vty_out (vty, "%5u %7d %7d %8d %4d %4u ", + vty_out (vty, "%5u %7d %7d %8lu %4d %4u ", peer->as, peer->open_in + peer->update_in + peer->keepalive_in + peer->notify_in + peer->refresh_in @@ -7935,12 +8030,11 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del peer->open_out + peer->update_out + peer->keepalive_out + peer->notify_out + peer->refresh_out + peer->dynamic_cap_out, + peer->version[afi][safi], 0, - 0, - peer->sync[afi][safi]->update.count + - peer->sync[afi][safi]->withdraw.count); + (unsigned long) peer->obuf->count); else - vty_out (vty, "%5u %c %7d %c %7d %c %8d %c %4d %c %4u %c", + vty_out (vty, "%5u %c %7d %c %7d %c %8lu %c %4d %c %4u %c", peer->as, *delimit, peer->open_in + peer->update_in + peer->keepalive_in + peer->notify_in + peer->refresh_in @@ -7948,10 +8042,9 @@ bgp_show_summary (struct vty *vty, struct bgp *bgp, int afi, int safi, char *del peer->open_out + peer->update_out + peer->keepalive_out + peer->notify_out + peer->refresh_out + peer->dynamic_cap_out, *delimit, + peer->version[afi][safi], *delimit, 0, *delimit, - 0, *delimit, - peer->sync[afi][safi]->update.count + - peer->sync[afi][safi]->withdraw.count, *delimit); + (unsigned long) peer->obuf->count, *delimit); vty_out (vty, "%8s", peer_uptime (peer->uptime, timebuf, BGP_UPTIME_LEN)); @@ -8320,6 +8413,7 @@ static void bgp_show_peer_afi (struct vty *vty, struct peer *p, afi_t afi, safi_t safi) { struct bgp_filter *filter; + struct peer_af *paf; char orf_pfx_name[BUFSIZ]; int orf_pfx_count; @@ -8331,6 +8425,17 @@ bgp_show_peer_afi (struct vty *vty, struct peer *p, afi_t afi, safi_t safi) if (p->af_group[afi][safi]) vty_out (vty, " %s peer-group member%s", p->group->name, VTY_NEWLINE); + paf = peer_af_find(p, afi, safi); + if (paf && PAF_SUBGRP(paf)) + { + vty_out (vty, " Update group %llu, subgroup %llu%s", + PAF_UPDGRP(paf)->id, PAF_SUBGRP(paf)->id, VTY_NEWLINE); + vty_out (vty, " Packet Queue length %d%s", + bpacket_queue_virtual_length(paf), VTY_NEWLINE); + } else + { + vty_out(vty, " Not part of any update group%s", VTY_NEWLINE); + } if (CHECK_FLAG (p->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_ADV) || CHECK_FLAG (p->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_RCV) || CHECK_FLAG (p->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_OLD_RCV) @@ -8425,7 +8530,8 @@ bgp_show_peer_afi (struct vty *vty, struct peer *p, afi_t afi, safi_t safi) vty_out (vty, " default route-map %s%s,", p->default_rmap[afi][safi].map ? "*" : "", p->default_rmap[afi][safi].name); - if (CHECK_FLAG (p->af_sflags[afi][safi], PEER_STATUS_DEFAULT_ORIGINATE)) + if (paf && PAF_SUBGRP(paf) && CHECK_FLAG(PAF_SUBGRP(paf)->sflags, + SUBGRP_STATUS_DEFAULT_ORIGINATE)) vty_out (vty, " default sent%s", VTY_NEWLINE); else vty_out (vty, " default not sent%s", VTY_NEWLINE); @@ -8962,7 +9068,7 @@ bgp_show_peer (struct vty *vty, struct peer *p) vty_out (vty, "Next connect timer due in %ld seconds%s", thread_timer_remain_second (p->t_connect), VTY_NEWLINE); if (p->t_routeadv) - vty_out (vty, "MRAI (interval %ld) timer expires in %ld seconds%s", + vty_out (vty, "MRAI (interval %u) timer expires in %ld seconds%s", p->v_routeadv, thread_timer_remain_second (p->t_routeadv), VTY_NEWLINE); @@ -9664,6 +9770,205 @@ ALIAS (show_bgp_instance_ipv6_safi_rsclient_summary, #endif /* HAVE IPV6 */ +DEFUN (show_ip_bgp_updgrps, + show_ip_bgp_updgrps_cmd, + "show ip bgp update-groups summary", + SHOW_STR + IP_STR + BGP_STR + "BGP update groups\n" + "Summary information\n") +{ + struct bgp *bgp; + + bgp = bgp_get_default(); + if (bgp) + update_group_show(bgp, AFI_IP, SAFI_UNICAST, vty); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_ipv6_updgrps, + show_bgp_ipv6_updgrps_cmd, + "show bgp update-groups summary", + SHOW_STR + BGP_STR + "BGP update groups\n" + "Summary information\n") +{ + struct bgp *bgp; + + bgp = bgp_get_default(); + if (bgp) + update_group_show(bgp, AFI_IP6, SAFI_UNICAST, vty); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps, + show_bgp_updgrps_cmd, + "show bgp (ipv4|ipv6) (unicast|multicast) update-groups summary", + SHOW_STR + BGP_STR + "Address family\n" + "Address family\n" + "Address Family modifier\n" + "Address Family modifier\n" + "BGP update groups\n" + "Summary information\n") +{ + struct bgp *bgp; + afi_t afi; + safi_t safi; + + afi = (strcmp(argv[0], "ipv4") == 0) ? AFI_IP : AFI_IP6; + safi = (strncmp (argv[1], "m", 1) == 0) ? SAFI_MULTICAST : SAFI_UNICAST; + bgp = bgp_get_default(); + if (bgp) + update_group_show(bgp, afi, safi, vty); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_stats, + show_bgp_updgrps_stats_cmd, + "show bgp update-groups statistics", + SHOW_STR + BGP_STR + "BGP update groups\n" + "Statistics\n") +{ + struct bgp *bgp; + + bgp = bgp_get_default(); + if (bgp) + update_group_show_stats(bgp, vty); + + return CMD_SUCCESS; +} + +static void +show_bgp_updgrps_adj_info_aux (struct vty *vty, afi_t afi, safi_t safi, + const char *what, u_int64_t subgrp_id) +{ + struct bgp *bgp; + bgp = bgp_get_default(); + if (bgp) + { + if (!strcmp(what, "advertise-queue")) + update_group_show_adj_queue(bgp, afi, safi, vty, subgrp_id); + else if (!strcmp(what, "advertised-routes")) + update_group_show_advertised(bgp, afi, safi, vty, subgrp_id); + else if (!strcmp(what, "packet-queue")) + update_group_show_packet_queue(bgp, afi, safi, vty, subgrp_id); + } +} + +DEFUN (show_ip_bgp_updgrps_adj, + show_ip_bgp_updgrps_adj_cmd, + "show ip bgp update-groups (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + IP_STR + BGP_STR + "BGP update groups\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP, SAFI_UNICAST, argv[0], 0); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_afi_adj, + show_bgp_updgrps_afi_adj_cmd, + "show bgp (ipv4|ipv6) (unicast|multicast) update-groups (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "Address family\n" + "Address family\n" + "Address Family modifier\n" + "Address Family modifier\n" + "BGP update groups\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + afi_t afi; + safi_t safi; + + afi = (strcmp(argv[0], "ipv4") == 0) ? AFI_IP : AFI_IP6; + safi = (strncmp (argv[1], "m", 1) == 0) ? SAFI_MULTICAST : SAFI_UNICAST; + show_bgp_updgrps_adj_info_aux(vty, afi, safi, argv[2], 0); +} + +DEFUN (show_bgp_updgrps_adj, + show_bgp_updgrps_adj_cmd, + "show bgp update-groups (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "BGP update groups\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP6, SAFI_UNICAST, argv[0], 0); + return CMD_SUCCESS; +} + +DEFUN (show_ip_bgp_updgrps_adj_s, + show_ip_bgp_updgrps_adj_subgroup_cmd, + "show ip bgp update-groups SUBGROUP-ID (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + IP_STR + BGP_STR + "BGP update groups\n" + "64-bit subgroup id\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP, SAFI_UNICAST, argv[1], + atoll(argv[0])); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_adj_s, + show_bgp_updgrps_adj_subgroup_cmd, + "show bgp update-groups SUBGROUP-ID (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "BGP update groups\n" + "64-bit subgroup id\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + show_bgp_updgrps_adj_info_aux(vty, AFI_IP6, SAFI_UNICAST, argv[1], + atoll(argv[0])); + return CMD_SUCCESS; +} + +DEFUN (show_bgp_updgrps_afi_adj_subgroup, + show_bgp_updgrps_afi_adj_subgroup_cmd, + "show bgp (ipv4|ipv6) (unicast|multicast) update-groups SUBGROUP-ID (advertise-queue|advertised-routes|packet-queue)", + SHOW_STR + BGP_STR + "Address family\n" + "Address family\n" + "Address Family modifier\n" + "Address Family modifier\n" + "BGP update groups\n" + "64-bit subgroup id\n" + "Advertisement queue\n" + "Announced routes\n" + "Packet queue\n") +{ + afi_t afi; + safi_t safi; + + afi = (strcmp(argv[0], "ipv4") == 0) ? AFI_IP : AFI_IP6; + safi = (strncmp (argv[1], "m", 1) == 0) ? SAFI_MULTICAST : SAFI_UNICAST; + show_bgp_updgrps_adj_info_aux(vty, afi, safi, argv[3], atoll(argv[2])); +} + + /* Redistribute VTY commands. */ DEFUN (bgp_redistribute_ipv4, @@ -10393,6 +10698,9 @@ bgp_vty_init (void) install_element (BGP_NODE, &bgp_wpkt_quanta_cmd); install_element (BGP_NODE, &no_bgp_wpkt_quanta_cmd); + install_element (BGP_NODE, &bgp_coalesce_time_cmd); + install_element (BGP_NODE, &no_bgp_coalesce_time_cmd); + /* "maximum-paths" commands. */ install_element (BGP_NODE, &bgp_maxpaths_cmd); install_element (BGP_NODE, &no_bgp_maxpaths_cmd); @@ -10496,6 +10804,10 @@ bgp_vty_init (void) install_element (BGP_NODE, &no_bgp_default_local_preference_cmd); install_element (BGP_NODE, &no_bgp_default_local_preference_val_cmd); + /* "bgp default subgroup-pkt-queue-max" commands. */ + install_element (BGP_NODE, &bgp_default_subgroup_pkt_queue_max_cmd); + install_element (BGP_NODE, &no_bgp_default_subgroup_pkt_queue_max_cmd); + /* bgp ibgp-allow-policy-mods command */ install_element (BGP_NODE, &bgp_rr_allow_outbound_policy_cmd); install_element (BGP_NODE, &no_bgp_rr_allow_outbound_policy_cmd); @@ -10521,6 +10833,10 @@ bgp_vty_init (void) install_element (BGP_NODE, &no_neighbor_local_as_val2_cmd); install_element (BGP_NODE, &no_neighbor_local_as_val3_cmd); + /* "neighbor solo" commands. */ + install_element (BGP_NODE, &neighbor_solo_cmd); + install_element (BGP_NODE, &no_neighbor_solo_cmd); + /* "neighbor password" commands. */ install_element (BGP_NODE, &neighbor_password_cmd); install_element (BGP_NODE, &no_neighbor_password_cmd); @@ -11353,6 +11669,15 @@ bgp_vty_init (void) /* "show ip bgp summary" commands. */ install_element (VIEW_NODE, &show_ip_bgp_summary_cmd); install_element (VIEW_NODE, &show_ip_bgp_summary_csv_cmd); + install_element (VIEW_NODE, &show_ip_bgp_updgrps_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_cmd); + install_element (VIEW_NODE, &show_bgp_ipv6_updgrps_cmd); + install_element (VIEW_NODE, &show_ip_bgp_updgrps_adj_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_adj_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_afi_adj_cmd); + install_element (VIEW_NODE, &show_ip_bgp_updgrps_adj_subgroup_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_adj_subgroup_cmd); + install_element (VIEW_NODE, &show_bgp_updgrps_afi_adj_subgroup_cmd); install_element (VIEW_NODE, &show_ip_bgp_instance_summary_cmd); install_element (VIEW_NODE, &show_ip_bgp_ipv4_summary_cmd); install_element (VIEW_NODE, &show_bgp_ipv4_safi_summary_cmd); @@ -11371,6 +11696,15 @@ bgp_vty_init (void) #endif /* HAVE_IPV6 */ install_element (RESTRICTED_NODE, &show_ip_bgp_summary_cmd); install_element (RESTRICTED_NODE, &show_ip_bgp_summary_csv_cmd); + install_element (RESTRICTED_NODE, &show_ip_bgp_updgrps_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_cmd); + install_element (RESTRICTED_NODE, &show_bgp_ipv6_updgrps_cmd); + install_element (RESTRICTED_NODE, &show_ip_bgp_updgrps_adj_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_adj_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_afi_adj_cmd); + install_element (RESTRICTED_NODE, &show_ip_bgp_updgrps_adj_subgroup_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_adj_subgroup_cmd); + install_element (RESTRICTED_NODE, &show_bgp_updgrps_afi_adj_subgroup_cmd); install_element (RESTRICTED_NODE, &show_ip_bgp_instance_summary_cmd); install_element (RESTRICTED_NODE, &show_ip_bgp_ipv4_summary_cmd); install_element (RESTRICTED_NODE, &show_bgp_ipv4_safi_summary_cmd); @@ -11389,6 +11723,16 @@ bgp_vty_init (void) #endif /* HAVE_IPV6 */ install_element (ENABLE_NODE, &show_ip_bgp_summary_cmd); install_element (ENABLE_NODE, &show_ip_bgp_summary_csv_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_updgrps_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_cmd); + install_element (ENABLE_NODE, &show_bgp_ipv6_updgrps_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_stats_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_updgrps_adj_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_adj_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_afi_adj_cmd); + install_element (ENABLE_NODE, &show_ip_bgp_updgrps_adj_subgroup_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_adj_subgroup_cmd); + install_element (ENABLE_NODE, &show_bgp_updgrps_afi_adj_subgroup_cmd); install_element (ENABLE_NODE, &show_ip_bgp_instance_summary_cmd); install_element (ENABLE_NODE, &show_ip_bgp_ipv4_summary_cmd); install_element (ENABLE_NODE, &show_bgp_ipv4_safi_summary_cmd); diff --git a/bgpd/bgp_vty.h b/bgpd/bgp_vty.h index 9caf0baacee7..1357e3c25ad3 100644 --- a/bgpd/bgp_vty.h +++ b/bgpd/bgp_vty.h @@ -27,5 +27,6 @@ extern void bgp_vty_init (void); extern const char *afi_safi_print (afi_t, safi_t); extern int bgp_config_write_update_delay (struct vty *, struct bgp *); extern int bgp_config_write_wpkt_quanta(struct vty *vty, struct bgp *bgp); +extern int bgp_config_write_coalesce_time(struct vty *vty, struct bgp *bgp); #endif /* _QUAGGA_BGP_VTY_H */ diff --git a/bgpd/bgp_zebra.c b/bgpd/bgp_zebra.c index 78735caded78..4c3f3dbb21e1 100644 --- a/bgpd/bgp_zebra.c +++ b/bgpd/bgp_zebra.c @@ -29,6 +29,7 @@ Boston, MA 02111-1307, USA. */ #include "zclient.h" #include "routemap.h" #include "thread.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_route.h" diff --git a/bgpd/bgpd.c b/bgpd/bgpd.c index 8ec27e338dd3..17bf8cf5d623 100644 --- a/bgpd/bgpd.c +++ b/bgpd/bgpd.c @@ -35,6 +35,7 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "plist.h" #include "linklist.h" #include "workqueue.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" @@ -62,6 +63,8 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #ifdef HAVE_SNMP #include "bgpd/bgp_snmp.h" #endif /* HAVE_SNMP */ +#include "bgpd/bgp_updgrp.h" + /* BGP process wide configuration. */ static struct bgp_master bgp_master; @@ -547,6 +550,104 @@ bgp_default_local_preference_unset (struct bgp *bgp) return 0; } +/* Local preference configuration. */ +int +bgp_default_subgroup_pkt_queue_max_set (struct bgp *bgp, u_int32_t queue_size) +{ + if (! bgp) + return -1; + + bgp->default_subgroup_pkt_queue_max = queue_size; + + return 0; +} + +int +bgp_default_subgroup_pkt_queue_max_unset (struct bgp *bgp) +{ + if (! bgp) + return -1; + bgp->default_subgroup_pkt_queue_max = BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX; + + return 0; +} + +struct peer_af * +peer_af_create (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_af *af; + int afid; + + if (!peer) + return NULL; + + afid = afindex(afi, safi); + if (afid >= BGP_AF_MAX) + return NULL; + + assert(peer->peer_af_array[afid] == NULL); + + /* Allocate new peer af */ + af = XCALLOC (MTYPE_BGP_PEER_AF, sizeof (struct peer_af)); + peer->peer_af_array[afid] = af; + af->afi = afi; + af->safi = safi; + af->afid = afid; + af->peer = peer; + + //update_group_adjust_peer(af); + return af; +} + +struct peer_af * +peer_af_find (struct peer *peer, afi_t afi, safi_t safi) +{ + int afid; + + if (!peer) + return NULL; + + afid = afindex(afi, safi); + if (afid >= BGP_AF_MAX) + return NULL; + + return peer->peer_af_array[afid]; +} + +int +peer_af_delete (struct peer *peer, afi_t afi, safi_t safi) +{ + struct peer_af *af; + int afid; + + if (!peer) + return -1; + + afid = afindex(afi, safi); + if (afid >= BGP_AF_MAX) + return -1; + + af = peer->peer_af_array[afid]; + if (!af) + return -1; + + bgp_stop_announce_route_timer (af); + + if (PAF_SUBGRP(af)) + { + if (BGP_DEBUG (update_groups, UPDATE_GROUPS)) + zlog_debug ("u%llu:s%llu remove peer %s", + af->subgroup->update_group->id, af->subgroup->id, peer->host); + } + + update_subgroup_remove_peer (af->subgroup, af); + + peer->peer_af_array[afid] = NULL; + XFREE(MTYPE_BGP_PEER_AF, af); + return 0; +} + + /* If peer is RSERVER_CLIENT in at least one address family and is not member of a peer_group for that family, return 1. Used to check wether the peer is included in list bgp->rsclient. */ @@ -906,8 +1007,10 @@ peer_new (struct bgp *bgp) void peer_xfer_config (struct peer *peer_dst, struct peer *peer_src) { + struct peer_af *paf; afi_t afi; safi_t safi; + int afindex; assert(peer_src); assert(peer_dst); @@ -953,6 +1056,9 @@ peer_xfer_config (struct peer *peer_dst, struct peer *peer_src) peer_dst->allowas_in[afi][safi] = peer_src->allowas_in[afi][safi]; } + PEERAF_FOREACH(peer_src, paf, afindex) + peer_af_create(peer_dst, paf->afi, paf->safi); + /* update-source apply */ if (peer_src->update_source) { @@ -1057,9 +1163,6 @@ peer_create (union sockunion *su, const char *conf_if, struct bgp *bgp, active = peer_active (peer); - if (afi && safi) - peer->afc[afi][safi] = 1; - /* Last read and reset time set */ peer->readtime = peer->resettime = bgp_clock (); @@ -1068,6 +1171,15 @@ peer_create (union sockunion *su, const char *conf_if, struct bgp *bgp, SET_FLAG (peer->flags, PEER_FLAG_CONFIG_NODE); + if (afi && safi) + { + peer->afc[afi][safi] = 1; + if (peer_af_create(peer, afi, safi) == NULL) + { + zlog_err("couldn't create af structure for peer %s", peer->host); + } + } + /* Set up peer's events and timers. */ if (! active && peer_active (peer)) bgp_timer_set (peer); @@ -1273,6 +1385,11 @@ peer_activate (struct peer *peer, afi_t afi, safi_t safi) peer->afc[afi][safi] = 1; + if (peer_af_create(peer, afi, safi) == NULL) + { + zlog_err("couldn't create af structure for peer %s", peer->host); + } + if (! active && peer_active (peer)) bgp_timer_set (peer); else @@ -1332,6 +1449,10 @@ peer_deactivate (struct peer *peer, afi_t afi, safi_t safi) /* De-activate the address family configuration. */ peer->afc[afi][safi] = 0; peer_af_flag_reset (peer, afi, safi); + if (peer_af_delete(peer, afi, safi) != 0) + { + zlog_err("couldn't delete af structure for peer %s", peer->host); + } if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { @@ -1551,6 +1672,9 @@ peer_delete (struct peer *peer) filter->usmap.name = NULL; peer->default_rmap[afi][safi].name = NULL; } + + FOREACH_AFI_SAFI (afi, safi) + peer_af_delete (peer, afi, safi); peer_unlock (peer); /* initial reference */ @@ -1563,19 +1687,6 @@ peer_group_cmp (struct peer_group *g1, struct peer_group *g2) return strcmp (g1->name, g2->name); } -/* If peer is configured at least one address family return 1. */ -static int -peer_group_active (struct peer *peer) -{ - if (peer->af_group[AFI_IP][SAFI_UNICAST] - || peer->af_group[AFI_IP][SAFI_MULTICAST] - || peer->af_group[AFI_IP][SAFI_MPLS_VPN] - || peer->af_group[AFI_IP6][SAFI_UNICAST] - || peer->af_group[AFI_IP6][SAFI_MULTICAST]) - return 1; - return 0; -} - /* Peer group cofiguration. */ static struct peer_group * peer_group_new (void) @@ -2052,6 +2163,11 @@ peer_group_bind (struct bgp *bgp, union sockunion *su, struct peer *peer, peer->af_group[afi][safi] = 1; peer->afc[afi][safi] = 1; + if (!peer_af_find(peer, afi, safi) && + peer_af_create(peer, afi, safi) == NULL) + { + zlog_err("couldn't create af structure for peer %s", peer->host); + } if (! peer->group) { peer->group = group; @@ -2151,6 +2267,10 @@ peer_group_unbind (struct bgp *bgp, struct peer *peer, peer->af_group[afi][safi] = 0; peer->afc[afi][safi] = 0; peer_af_flag_reset (peer, afi, safi); + if (peer_af_delete(peer, afi, safi) != 0) + { + zlog_err("couldn't delete af structure for peer %s", peer->host); + } if (peer->rib[afi][safi]) peer->rib[afi][safi] = NULL; @@ -2240,6 +2360,7 @@ bgp_create (as_t *as, const char *name) bgp->v_update_delay = BGP_UPDATE_DELAY_DEF; bgp->default_local_pref = BGP_DEFAULT_LOCAL_PREF; + bgp->default_subgroup_pkt_queue_max = BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX; bgp->default_holdtime = BGP_DEFAULT_HOLDTIME; bgp->default_keepalive = BGP_DEFAULT_KEEPALIVE; bgp->restart_time = BGP_DEFAULT_RESTART_TIME; @@ -2251,12 +2372,12 @@ bgp_create (as_t *as, const char *name) bgp->name = strdup (name); bgp->wpkt_quanta = BGP_WRITE_PACKET_MAX; - bgp->adv_quanta = BGP_ADV_FIFO_QUANTA; - bgp->wd_quanta = BGP_WD_FIFO_QUANTA; + bgp->coalesce_time = BGP_DEFAULT_SUBGROUP_COALESCE_TIME; THREAD_TIMER_ON (master, bgp->t_startup, bgp_startup_timer_expire, bgp, bgp->restart_time); + update_group_init(bgp); return bgp; } @@ -2614,7 +2735,10 @@ peer_change_action (struct peer *peer, afi_t afi, safi_t safi, } } else if (type == peer_change_reset_out) - bgp_announce_route (peer, afi, safi); + { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); + bgp_announce_route (peer, afi, safi); + } } struct peer_flag_action @@ -3386,8 +3510,11 @@ peer_default_originate_set (struct peer *peer, afi_t afi, safi_t safi, if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 0); + bgp_announce_route (peer, afi, safi); + } return 0; } @@ -3405,8 +3532,11 @@ peer_default_originate_set (struct peer *peer, afi_t afi, safi_t safi, peer->default_rmap[afi][safi].map = route_map_lookup_by_name (rmap); } - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 0); + bgp_announce_route (peer, afi, safi); + } } return 0; } @@ -3437,8 +3567,11 @@ peer_default_originate_unset (struct peer *peer, afi_t afi, safi_t safi) if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 1); + bgp_announce_route (peer, afi, safi); + } return 0; } @@ -3453,8 +3586,11 @@ peer_default_originate_unset (struct peer *peer, afi_t afi, safi_t safi) peer->default_rmap[afi][safi].name = NULL; peer->default_rmap[afi][safi].map = NULL; - if (peer->status == Established && peer->afc_nego[afi][safi]) + if (peer->status == Established && peer->afc_nego[afi][safi]) { + update_group_adjust_peer(peer_af_find(peer, afi, safi)); bgp_default_originate (peer, afi, safi, 1); + bgp_announce_route (peer, afi, safi); + } } return 0; } @@ -3670,8 +3806,12 @@ peer_advertise_interval_set (struct peer *peer, u_int32_t routeadv) peer->routeadv = routeadv; peer->v_routeadv = routeadv; - if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); return 0; + } /* peer-group member updates. */ group = peer->group; @@ -3680,6 +3820,9 @@ peer_advertise_interval_set (struct peer *peer, u_int32_t routeadv) SET_FLAG (peer->config, PEER_CONFIG_ROUTEADV); peer->routeadv = routeadv; peer->v_routeadv = routeadv; + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); } return 0; @@ -3702,8 +3845,12 @@ peer_advertise_interval_unset (struct peer *peer) else peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; - if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) + if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); return 0; + } /* peer-group member updates. */ group = peer->group; @@ -3716,6 +3863,10 @@ peer_advertise_interval_unset (struct peer *peer) peer->v_routeadv = BGP_DEFAULT_IBGP_ROUTEADV; else peer->v_routeadv = BGP_DEFAULT_EBGP_ROUTEADV; + + update_group_adjust_peer_afs (peer); + if (peer->status == Established) + bgp_announce_route_all (peer); } return 0; @@ -4035,6 +4186,40 @@ peer_password_unset (struct peer *peer) return 0; } +/* + * peer_on_policy_change + * + * Helper function that is called after the name of the policy + * being used by a peer_af has changed. + */ +static void +peer_on_policy_change (struct peer *peer, afi_t afi, safi_t safi) +{ + update_group_adjust_peer (peer_af_find (peer, afi, safi)); +} + +/* Set route-map to the peer. */ +static void +peer_reprocess_routes (struct peer *peer, int direct, + afi_t afi, safi_t safi) +{ + if (peer->status != Established) + return; + + if (direct != RMAP_OUT) + { + if (CHECK_FLAG (peer->af_flags[afi][safi], + PEER_FLAG_SOFT_RECONFIG)) + bgp_soft_reconfig_in (peer, afi, safi); + else if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) + || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) + bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); + } + else + bgp_announce_route(peer, afi, safi); +} + + /* Set distribute list to the peer. */ int peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, @@ -4064,7 +4249,11 @@ peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, filter->dlist[direct].alist = access_list_lookup (afi, name); if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4078,6 +4267,9 @@ peer_distribute_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->dlist[direct].name); filter->dlist[direct].name = strdup (name); filter->dlist[direct].alist = access_list_lookup (afi, name); + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4113,6 +4305,8 @@ peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->dlist[direct].name); filter->dlist[direct].name = strdup (gfilter->dlist[direct].name); filter->dlist[direct].alist = gfilter->dlist[direct].alist; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4123,7 +4317,11 @@ peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) filter->dlist[direct].alist = NULL; if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4137,6 +4335,8 @@ peer_distribute_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->dlist[direct].name); filter->dlist[direct].name = NULL; filter->dlist[direct].alist = NULL; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4158,6 +4358,9 @@ peer_distribute_update (struct access_list *access) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) { + if (access->name) + update_group_policy_update(bgp, BGP_POLICY_FILTER_LIST, access->name, + 0, 0); for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { for (afi = AFI_IP; afi < AFI_MAX; afi++) @@ -4224,7 +4427,11 @@ peer_prefix_list_set (struct peer *peer, afi_t afi, safi_t safi, int direct, filter->plist[direct].plist = prefix_list_lookup (afi, name); if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4238,6 +4445,9 @@ peer_prefix_list_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->plist[direct].name); filter->plist[direct].name = strdup (name); filter->plist[direct].plist = prefix_list_lookup (afi, name); + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; } @@ -4272,6 +4482,8 @@ peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->plist[direct].name); filter->plist[direct].name = strdup (gfilter->plist[direct].name); filter->plist[direct].plist = gfilter->plist[direct].plist; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4282,7 +4494,11 @@ peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) filter->plist[direct].plist = NULL; if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4296,6 +4512,9 @@ peer_prefix_list_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->plist[direct].name); filter->plist[direct].name = NULL; filter->plist[direct].plist = NULL; + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4317,6 +4536,13 @@ peer_prefix_list_update (struct prefix_list *plist) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) { + + /* + * Update the prefix-list on update groups. + */ + update_group_policy_update(bgp, BGP_POLICY_PREFIX_LIST, + plist ? plist->name : NULL, 0, 0); + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { for (afi = AFI_IP; afi < AFI_MAX; afi++) @@ -4379,7 +4605,11 @@ peer_aslist_set (struct peer *peer, afi_t afi, safi_t safi, int direct, filter->aslist[direct].aslist = as_list_lookup (name); if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4393,6 +4623,8 @@ peer_aslist_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->aslist[direct].name); filter->aslist[direct].name = strdup (name); filter->aslist[direct].aslist = as_list_lookup (name); + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; } @@ -4427,6 +4659,8 @@ peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) free (filter->aslist[direct].name); filter->aslist[direct].name = strdup (gfilter->aslist[direct].name); filter->aslist[direct].aslist = gfilter->aslist[direct].aslist; + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4437,7 +4671,11 @@ peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) filter->aslist[direct].aslist = NULL; if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) - return 0; + { + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); + return 0; + } group = peer->group; for (ALL_LIST_ELEMENTS (group->peer, node, nnode, peer)) @@ -4451,6 +4689,9 @@ peer_aslist_unset (struct peer *peer,afi_t afi, safi_t safi, int direct) free (filter->aslist[direct].name); filter->aslist[direct].name = NULL; filter->aslist[direct].aslist = NULL; + + if (direct == FILTER_OUT) + peer_on_policy_change (peer, afi, safi); } return 0; @@ -4471,6 +4712,9 @@ peer_aslist_update (char *aslist_name) for (ALL_LIST_ELEMENTS (bm->bgp, mnode, mnnode, bgp)) { + update_group_policy_update(bgp, BGP_POLICY_FILTER_LIST, aslist_name, + 0, 0); + for (ALL_LIST_ELEMENTS (bgp->peer, node, nnode, peer)) { for (afi = AFI_IP; afi < AFI_MAX; afi++) @@ -4507,6 +4751,7 @@ peer_aslist_update (char *aslist_name) } } } + static void peer_aslist_add (char *aslist_name) { @@ -4522,27 +4767,6 @@ peer_aslist_del (char *aslist_name) } -/* Set route-map to the peer. */ -static void -peer_reprocess_routes (struct peer *peer, int direct, - afi_t afi, safi_t safi) -{ - if (peer->status != Established) - return; - - if (direct != RMAP_OUT) - { - if (CHECK_FLAG (peer->af_flags[afi][safi], - PEER_FLAG_SOFT_RECONFIG)) - bgp_soft_reconfig_in (peer, afi, safi); - else if (CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_OLD_RCV) - || CHECK_FLAG (peer->cap, PEER_CAP_REFRESH_NEW_RCV)) - bgp_route_refresh_send (peer, afi, safi, 0, 0, 0); - } - else - bgp_announce_route(peer, afi, safi); -} - int peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, const char *name) @@ -4572,6 +4796,8 @@ peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes(peer, direct, afi, safi); return 0; } @@ -4588,6 +4814,8 @@ peer_route_map_set (struct peer *peer, afi_t afi, safi_t safi, int direct, free (filter->map[direct].name); filter->map[direct].name = strdup (name); filter->map[direct].map = route_map_lookup_by_name (name); + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes (peer, direct, afi, safi); } return 0; @@ -4626,6 +4854,8 @@ peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->map[direct].name); filter->map[direct].name = strdup (gfilter->map[direct].name); filter->map[direct].map = gfilter->map[direct].map; + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); return 0; } } @@ -4637,6 +4867,8 @@ peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes(peer, direct, afi, safi); return 0; } @@ -4653,6 +4885,8 @@ peer_route_map_unset (struct peer *peer, afi_t afi, safi_t safi, int direct) free (filter->map[direct].name); filter->map[direct].name = NULL; filter->map[direct].map = NULL; + if (direct == RMAP_OUT) + peer_on_policy_change (peer, afi, safi); peer_reprocess_routes(peer, direct, afi, safi); } return 0; @@ -4683,6 +4917,7 @@ peer_unsuppress_map_set (struct peer *peer, afi_t afi, safi_t safi, if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + peer_on_policy_change (peer, afi, safi); bgp_announce_route (peer, afi, safi); return 0; } @@ -4699,6 +4934,7 @@ peer_unsuppress_map_set (struct peer *peer, afi_t afi, safi_t safi, free (filter->usmap.name); filter->usmap.name = strdup (name); filter->usmap.map = route_map_lookup_by_name (name); + peer_on_policy_change (peer, afi, safi); bgp_announce_route (peer, afi, safi); } return 0; @@ -4727,6 +4963,7 @@ peer_unsuppress_map_unset (struct peer *peer, afi_t afi, safi_t safi) if (! CHECK_FLAG (peer->sflags, PEER_STATUS_GROUP)) { + peer_on_policy_change (peer, afi, safi); bgp_announce_route(peer, afi, safi); return 0; } @@ -4743,6 +4980,7 @@ peer_unsuppress_map_unset (struct peer *peer, afi_t afi, safi_t safi) free (filter->usmap.name); filter->usmap.name = NULL; filter->usmap.map = NULL; + peer_on_policy_change (peer, afi, safi); bgp_announce_route(peer, afi, safi); } return 0; @@ -5301,6 +5539,11 @@ bgp_config_write_peer (struct vty *vty, struct bgp *bgp, vty_out (vty, " neighbor %s password %s%s", addr, peer->password, VTY_NEWLINE); + /* neighbor solo */ + if (CHECK_FLAG(peer->flags, PEER_FLAG_LONESOUL)) + if (!peer_group_active (peer)) + vty_out (vty, " neighbor %s solo%s", addr, VTY_NEWLINE); + /* BGP port. */ if (peer->port != BGP_PORT_DEFAULT) vty_out (vty, " neighbor %s port %d%s", addr, peer->port, @@ -5741,6 +5984,11 @@ bgp_config_write (struct vty *vty) vty_out (vty, " bgp default local-preference %d%s", bgp->default_local_pref, VTY_NEWLINE); + /* BGP default subgroup-pkt-queue-max. */ + if (bgp->default_subgroup_pkt_queue_max != BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX) + vty_out (vty, " bgp default subgroup-pkt-queue-max %d%s", + bgp->default_subgroup_pkt_queue_max, VTY_NEWLINE); + /* BGP client-to-client reflection. */ if (bgp_flag_check (bgp, BGP_FLAG_NO_CLIENT_TO_CLIENT)) vty_out (vty, " no bgp client-to-client reflection%s", VTY_NEWLINE); @@ -5797,6 +6045,9 @@ bgp_config_write (struct vty *vty) /* write quanta */ bgp_config_write_wpkt_quanta (vty, bgp); + /* coalesce time */ + bgp_config_write_coalesce_time(vty, bgp); + /* BGP graceful-restart. */ if (bgp->stalepath_time != BGP_DEFAULT_STALEPATH_TIME) vty_out (vty, " bgp graceful-restart stalepath-time %d%s", diff --git a/bgpd/bgpd.h b/bgpd/bgpd.h index 6eafb59b208a..986364952512 100644 --- a/bgpd/bgpd.h +++ b/bgpd/bgpd.h @@ -22,14 +22,42 @@ Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #define _QUAGGA_BGPD_H /* For union sockunion. */ +#include "queue.h" #include "sockunion.h" #include "routemap.h" +struct update_subgroup; +struct bpacket; + /* Typedef BGP specific types. */ typedef u_int32_t as_t; typedef u_int16_t as16_t; /* we may still encounter 16 Bit asnums */ typedef u_int16_t bgp_size_t; +#define max(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) + +enum bgp_af_index +{ + BGP_AF_START, + BGP_AF_IPV4_UNICAST = BGP_AF_START, + BGP_AF_IPV4_MULTICAST, + BGP_AF_IPV4_VPN, + BGP_AF_IPV6_UNICAST, + BGP_AF_IPV6_MULTICAST, + BGP_AF_MAX +}; + +#define AF_FOREACH(af) \ + for ((af) = BGP_AF_START; (af) < BGP_AF_MAX; (af)++) + +#define FOREACH_AFI_SAFI(afi, safi) \ + for (afi = AFI_IP; afi < AFI_MAX; afi++) \ + for (safi = SAFI_UNICAST; safi < SAFI_MAX; safi++) + + /* BGP master for system wide configurations and variables. */ struct bgp_master { @@ -61,6 +89,9 @@ struct bgp_master #define BGP_OPT_MULTIPLE_INSTANCE (1 << 1) #define BGP_OPT_CONFIG_CISCO (1 << 2) #define BGP_OPT_NO_LISTEN (1 << 3) + + u_int64_t updgrp_idspace; + u_int64_t subgrp_idspace; }; /* BGP route-map structure. */ @@ -106,6 +137,27 @@ struct bgp /* BGP route-server-clients. */ struct list *rsclient; + struct hash *update_groups[BGP_AF_MAX]; + + /* + * Global statistics for update groups. + */ + struct { + u_int32_t join_events; + u_int32_t prune_events; + u_int32_t merge_events; + u_int32_t split_events; + u_int32_t updgrp_switch_events; + u_int32_t peer_refreshes_combined; + u_int32_t adj_count; + u_int32_t merge_checks_triggered; + + u_int32_t updgrps_created; + u_int32_t updgrps_deleted; + u_int32_t subgrps_created; + u_int32_t subgrps_deleted; + } update_group_stats; + /* BGP configuration. */ u_int16_t config; #define BGP_CONFIG_ROUTER_ID (1 << 0) @@ -212,6 +264,9 @@ struct bgp /* BGP default local-preference. */ u_int32_t default_local_pref; + /* BGP default subgroup pkt queue max */ + u_int32_t default_subgroup_pkt_queue_max; + /* BGP default timer. */ u_int32_t default_holdtime; u_int32_t default_keepalive; @@ -229,8 +284,7 @@ struct bgp } maxpaths[AFI_MAX][SAFI_MAX]; u_int32_t wpkt_quanta; /* per peer packet quanta to write */ - u_int32_t adv_quanta; /* adv FIFO size that triggers write */ - u_int32_t wd_quanta; /* withdraw FIFO size that triggers write */ + u_int32_t coalesce_time; }; #define BGP_ROUTE_ADV_HOLD(bgp) \ @@ -346,6 +400,38 @@ typedef enum #define BGP_MAX_PACKET_SIZE 4096 #define BGP_MAX_PACKET_SIZE_OVERFLOW 1024 +/* + * Trigger delay for bgp_announce_route(). + */ +#define BGP_ANNOUNCE_ROUTE_SHORT_DELAY_MS 100 +#define BGP_ANNOUNCE_ROUTE_DELAY_MS 500 + +struct peer_af +{ + /* back pointer to the peer */ + struct peer *peer; + + /* which subgroup the peer_af belongs to */ + struct update_subgroup *subgroup; + + /* for being part of an update subgroup's peer list */ + LIST_ENTRY(peer_af) subgrp_train; + + /* for being part of a packet's peer list */ + LIST_ENTRY(peer_af) pkt_train; + + struct bpacket *next_pkt_to_send; + + /* + * Trigger timer for bgp_announce_route(). + */ + struct thread *t_announce_route; + + afi_t afi; + safi_t safi; + int afid; +}; + /* BGP neighbor structure. */ struct peer { @@ -362,6 +448,10 @@ struct peer /* BGP peer group. */ struct peer_group *group; u_char af_group[AFI_MAX][SAFI_MAX]; + u_int64_t version[AFI_MAX][SAFI_MAX]; + + /* BGP peer_af structures, per configured AF on this peer */ + struct peer_af *peer_af_array[BGP_AF_MAX]; /* Peer's remote AS number. */ as_t as; @@ -479,6 +569,7 @@ struct peer #define PEER_FLAG_DELETE (1 << 9) /* mark the peer for deleting */ #define PEER_FLAG_CONFIG_NODE (1 << 10) /* the node to update configs on */ #define PEER_FLAG_BFD (1 << 11) /* bfd */ +#define PEER_FLAG_LONESOUL (1 << 12) /* NSF mode (graceful restart) */ u_char nsf[AFI_MAX][SAFI_MAX]; @@ -572,8 +663,6 @@ struct peer struct thread *t_gr_restart; struct thread *t_gr_stale; - int radv_adjusted; /* flag if MRAI has been adjusted or not */ - /* workqueues */ struct work_queue *clear_node_queue; @@ -699,6 +788,12 @@ struct bgp_nlri bgp_size_t length; }; +#define PEERAF_FOREACH(peer, paf, afi) \ + for ((afi) = BGP_AF_START, (paf) = (peer)->peer_af_array[(afi)]; \ + (afi) < BGP_AF_MAX; \ + (afi)++, (paf) = (peer)->peer_af_array[(afi)]) \ + if ((paf) != NULL) \ + /* BGP versions. */ #define BGP_VERSION_4 4 @@ -852,6 +947,9 @@ struct bgp_nlri /* BGP default local preference. */ #define BGP_DEFAULT_LOCAL_PREF 100 +/* BGP default subgroup packet queue max . */ +#define BGP_DEFAULT_SUBGROUP_PKT_QUEUE_MAX 40 + /* BGP graceful restart */ #define BGP_DEFAULT_RESTART_TIME 120 #define BGP_DEFAULT_STALEPATH_TIME 360 @@ -930,6 +1028,17 @@ enum bgp_clear_type #define BGP_ERR_AS_OVERRIDE -34 #define BGP_ERR_MAX -35 +/* + * Enumeration of different policy kinds a peer can be configured with. + */ +typedef enum +{ + BGP_POLICY_ROUTE_MAP, + BGP_POLICY_FILTER_LIST, + BGP_POLICY_PREFIX_LIST, + BGP_POLICY_DISTRIBUTE_LIST, +} bgp_policy_type_e; + extern struct bgp_master *bm; extern struct thread_master *master; @@ -1001,6 +1110,9 @@ extern int bgp_timers_unset (struct bgp *); extern int bgp_default_local_preference_set (struct bgp *, u_int32_t); extern int bgp_default_local_preference_unset (struct bgp *); +extern int bgp_default_subgroup_pkt_queue_max_set (struct bgp *bgp, u_int32_t); +extern int bgp_default_subgroup_pkt_queue_max_unset (struct bgp *bgp); + extern int bgp_update_delay_active (struct bgp *); extern int bgp_update_delay_configured (struct bgp *); extern int peer_rsclient_active (struct peer *); @@ -1096,4 +1208,95 @@ extern int bgp_route_map_update_timer (struct thread *thread); extern void bgp_route_map_terminate(void); extern int peer_cmp (struct peer *p1, struct peer *p2); + +extern struct peer_af * peer_af_create (struct peer *, afi_t, safi_t); +extern struct peer_af * peer_af_find (struct peer *, afi_t, safi_t); +extern int peer_af_delete (struct peer *, afi_t, safi_t); + +static inline int +afindex (afi_t afi, safi_t safi) +{ + switch (afi) + { + case AFI_IP: + switch (safi) + { + case SAFI_UNICAST: + return BGP_AF_IPV4_UNICAST; + break; + case SAFI_MULTICAST: + return BGP_AF_IPV4_MULTICAST; + break; + case SAFI_MPLS_VPN: + return BGP_AF_IPV4_VPN; + break; + default: + return BGP_AF_MAX; + break; + } + break; + case AFI_IP6: + switch (safi) + { + case SAFI_UNICAST: + return BGP_AF_IPV6_UNICAST; + break; + case SAFI_MULTICAST: + return BGP_AF_IPV6_MULTICAST; + break; + default: + return BGP_AF_MAX; + break; + } + break; + default: + return BGP_AF_MAX; + break; + } +} + +/* If peer is configured at least one address family return 1. */ +static inline int +peer_group_active (struct peer *peer) +{ + if (peer->af_group[AFI_IP][SAFI_UNICAST] + || peer->af_group[AFI_IP][SAFI_MULTICAST] + || peer->af_group[AFI_IP][SAFI_MPLS_VPN] + || peer->af_group[AFI_IP6][SAFI_UNICAST] + || peer->af_group[AFI_IP6][SAFI_MULTICAST]) + return 1; + return 0; +} + +/* If peer is negotiated at least one address family return 1. */ +static inline int +peer_afi_active_nego (const struct peer *peer, afi_t afi) +{ + if (peer->afc_nego[afi][SAFI_UNICAST] + || peer->afc_nego[afi][SAFI_MULTICAST] + || peer->afc_nego[afi][SAFI_MPLS_VPN]) + return 1; + return 0; +} + +static inline char * +timestamp_string (time_t ts) +{ +#ifdef HAVE_CLOCK_MONOTONIC + time_t tbuf; + tbuf = time(NULL) - (bgp_clock() - ts); + return ctime(&tbuf); +#else + return ctime(&ts); +#endif /* HAVE_CLOCK_MONOTONIC */ +} + +static inline int +peer_established (struct peer *peer) +{ + if (peer->status == Established) + return 1; + return 0; +} + #endif /* _QUAGGA_BGPD_H */ diff --git a/configure.ac b/configure.ac index f1df482b3285..0821b2fa4ad2 100755 --- a/configure.ac +++ b/configure.ac @@ -60,6 +60,17 @@ elif test -n "$CFLAGS" ; then cflags_specified=yes ; fi +AC_ARG_ENABLE(tcmalloc, +[ --enable-tcmalloc Turn on tcmalloc], +[case "${enableval}" in + yes) tcmalloc_enabled=true +LIBS="$LIBS -ltcmalloc_minimal" + ;; + no) tcmalloc_enabled=false ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-tcmalloc) ;; +esac],[tcmalloc_enabled=false]) + + dnl -------------------- dnl Check CC and friends dnl -------------------- diff --git a/lib/hash.c b/lib/hash.c index 56e41fa82605..4d3da66d79dc 100644 --- a/lib/hash.c +++ b/lib/hash.c @@ -225,6 +225,31 @@ hash_iterate (struct hash *hash, } } +/* Iterator function for hash. */ +void +hash_walk (struct hash *hash, + int (*func) (struct hash_backet *, void *), void *arg) +{ + unsigned int i; + struct hash_backet *hb; + struct hash_backet *hbnext; + int ret = HASHWALK_CONTINUE; + + for (i = 0; i < hash->size; i++) + { + for (hb = hash->index[i]; hb; hb = hbnext) + { + /* get pointer to next hash backet here, in case (*func) + * decides to delete hb by calling hash_release + */ + hbnext = hb->next; + ret = (*func) (hb, arg); + if (ret == HASHWALK_ABORT) + return; + } + } +} + /* Clean up hash. */ void hash_clean (struct hash *hash, void (*free_func) (void *)) diff --git a/lib/hash.h b/lib/hash.h index 920c6685fb69..9707dbd1bf76 100644 --- a/lib/hash.h +++ b/lib/hash.h @@ -25,6 +25,9 @@ Boston, MA 02111-1307, USA. */ #define HASH_INITIAL_SIZE 256 /* initial number of backets. */ #define HASH_THRESHOLD 10 /* expand when backet. */ +#define HASHWALK_CONTINUE 0 +#define HASHWALK_ABORT -1 + struct hash_backet { /* Linked list. */ @@ -71,6 +74,9 @@ extern void *hash_release (struct hash *, void *); extern void hash_iterate (struct hash *, void (*) (struct hash_backet *, void *), void *); +extern void hash_walk (struct hash *, + int (*) (struct hash_backet *, void *), void *); + extern void hash_clean (struct hash *, void (*) (void *)); extern void hash_free (struct hash *); diff --git a/lib/memtypes.c b/lib/memtypes.c index ca3a4a4f7615..c32c08817fab 100644 --- a/lib/memtypes.c +++ b/lib/memtypes.c @@ -99,6 +99,10 @@ struct memory_list memory_list_bgp[] = { MTYPE_PEER_GROUP, "Peer group" }, { MTYPE_PEER_DESC, "Peer description" }, { MTYPE_PEER_PASSWORD, "Peer password string" }, + { MTYPE_BGP_PEER_AF, "BGP peer af" }, + { MTYPE_BGP_UPDGRP, "BGP update group" }, + { MTYPE_BGP_UPD_SUBGRP, "BGP update subgroup" }, + { MTYPE_BGP_PACKET, "BGP packet" }, { MTYPE_ATTR, "BGP attribute" }, { MTYPE_ATTR_EXTRA, "BGP extra attributes" }, { MTYPE_AS_PATH, "BGP aspath" }, diff --git a/lib/plist.c b/lib/plist.c index f5950c331f1d..10012f3dc476 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -329,7 +329,7 @@ prefix_list_delete (struct prefix_list *plist) route_map_notify_dependencies(plist->name, RMAP_EVENT_PLIST_DELETED); if (master->delete_hook) - (*master->delete_hook) (NULL); + (*master->delete_hook) (plist); if (plist->name) XFREE (MTYPE_PREFIX_LIST_STR, plist->name); diff --git a/lib/sockunion.c b/lib/sockunion.c index 5dcf72563a18..3cbb59cc607a 100644 --- a/lib/sockunion.c +++ b/lib/sockunion.c @@ -552,7 +552,7 @@ sockopt_v6only (int family, int sock) /* If same family and same prefix return 1. */ int -sockunion_same (union sockunion *su1, union sockunion *su2) +sockunion_same (const union sockunion *su1, const union sockunion *su2) { int ret = 0; diff --git a/lib/sockunion.h b/lib/sockunion.h index b9f3514246aa..8f0a9be37c1c 100644 --- a/lib/sockunion.h +++ b/lib/sockunion.h @@ -86,7 +86,7 @@ enum connect_result extern int str2sockunion (const char *, union sockunion *); extern const char *sockunion2str (union sockunion *, char *, size_t); extern int sockunion_cmp (union sockunion *, union sockunion *); -extern int sockunion_same (union sockunion *, union sockunion *); +extern int sockunion_same (const union sockunion *, const union sockunion *); extern union sockunion *sockunion_str2su (const char *str); extern int sockunion_accept (int sock, union sockunion *); diff --git a/lib/stream.c b/lib/stream.c index 0fc3c3b1185d..cc5898a6dbb0 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -401,6 +401,21 @@ stream_getl_from (struct stream *s, size_t from) return l; } +/* Copy from stream at specific location to destination. */ +void +stream_get_from (void *dst, struct stream *s, size_t from, size_t size) +{ + STREAM_VERIFY_SANE(s); + + if (!GETP_VALID (s, from + size)) + { + STREAM_BOUND_WARN (s, "get from"); + return; + } + + memcpy (dst, s->data + from, size); +} + u_int32_t stream_getl (struct stream *s) { @@ -709,6 +724,38 @@ stream_put_in_addr (struct stream *s, struct in_addr *addr) return sizeof (u_int32_t); } +/* Put in_addr at location in the stream. */ +int +stream_put_in_addr_at (struct stream *s, size_t putp, struct in_addr *addr) +{ + STREAM_VERIFY_SANE(s); + + if (!PUT_AT_VALID (s, putp + 4)) + { + STREAM_BOUND_WARN (s, "put"); + return 0; + } + + memcpy (&s->data[putp], addr, 4); + return 4; +} + +/* Put in6_addr at location in the stream. */ +int +stream_put_in6_addr_at (struct stream *s, size_t putp, struct in6_addr *addr) +{ + STREAM_VERIFY_SANE(s); + + if (!PUT_AT_VALID (s, putp + 16)) + { + STREAM_BOUND_WARN (s, "put"); + return 0; + } + + memcpy (&s->data[putp], addr, 16); + return 16; +} + /* Put prefix by nlri type format. */ int stream_put_prefix (struct stream *s, struct prefix *p) diff --git a/lib/stream.h b/lib/stream.h index f0c742c05221..3efabe358d67 100644 --- a/lib/stream.h +++ b/lib/stream.h @@ -173,9 +173,12 @@ extern int stream_putq (struct stream *, uint64_t); extern int stream_putq_at (struct stream *, size_t, uint64_t); extern int stream_put_ipv4 (struct stream *, u_int32_t); extern int stream_put_in_addr (struct stream *, struct in_addr *); +extern int stream_put_in_addr_at (struct stream *, size_t, struct in_addr *); +extern int stream_put_in6_addr_at (struct stream *, size_t, struct in6_addr *); extern int stream_put_prefix (struct stream *, struct prefix *); extern void stream_get (void *, struct stream *, size_t); +extern void stream_get_from (void *, struct stream *, size_t, size_t); extern u_char stream_getc (struct stream *); extern u_char stream_getc_from (struct stream *, size_t); extern u_int16_t stream_getw (struct stream *); diff --git a/tests/aspath_test.c b/tests/aspath_test.c index 7fdb5e221020..8ba77b122b2d 100644 --- a/tests/aspath_test.c +++ b/tests/aspath_test.c @@ -3,6 +3,7 @@ #include "vty.h" #include "stream.h" #include "privs.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_aspath.h" diff --git a/tests/bgp_capability_test.c b/tests/bgp_capability_test.c index 31976e9ad8fe..c981504e5468 100644 --- a/tests/bgp_capability_test.c +++ b/tests/bgp_capability_test.c @@ -4,6 +4,7 @@ #include "stream.h" #include "privs.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_open.h" diff --git a/tests/bgp_mp_attr_test.c b/tests/bgp_mp_attr_test.c index 831c755b980b..10369cc74f26 100644 --- a/tests/bgp_mp_attr_test.c +++ b/tests/bgp_mp_attr_test.c @@ -4,6 +4,7 @@ #include "stream.h" #include "privs.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_attr.h" diff --git a/tests/bgp_mpath_test.c b/tests/bgp_mpath_test.c index a6ca9c53716a..aa20ae4069b7 100644 --- a/tests/bgp_mpath_test.c +++ b/tests/bgp_mpath_test.c @@ -29,6 +29,7 @@ #include "linklist.h" #include "memory.h" #include "zclient.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_table.h" diff --git a/tests/ecommunity_test.c b/tests/ecommunity_test.c index 87f20f282d87..6006bd735028 100644 --- a/tests/ecommunity_test.c +++ b/tests/ecommunity_test.c @@ -4,6 +4,7 @@ #include "stream.h" #include "privs.h" #include "memory.h" +#include "queue.h" #include "bgpd/bgpd.h" #include "bgpd/bgp_ecommunity.h"