Skip to content

Commit

Permalink
3925 IP DCE does not scale
Browse files Browse the repository at this point in the history
Reviewed by: Keith M Wesolowski <wesolows@foobazco.org>
Reviewed by: Theo Schlossnagle <jesus@omniti.com>
Reviewed by: Sebastien Roy <seb@delphix.com>
Approved by: Dan McDonald <danmcd@nexenta.com>
  • Loading branch information
jjelinek authored and rmustacc committed Aug 5, 2013
1 parent 096b237 commit 7c6d702
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 42 deletions.
12 changes: 12 additions & 0 deletions usr/src/uts/common/inet/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

/*
* Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 1990 Mentat Inc.
*/

Expand Down Expand Up @@ -2195,6 +2196,8 @@ struct ip_xmit_attr_s {
*/
ixa_notify_t ixa_notify; /* Registered upcall notify function */
void *ixa_notify_cookie; /* ULP cookie for ixa_notify */

uint_t ixa_tcpcleanup; /* Used by conn_ixa_cleanup */
};

/*
Expand Down Expand Up @@ -2265,6 +2268,14 @@ struct ip_xmit_attr_s {
#define IXA_FREE_CRED 0x00000001 /* ixa_cred needs to be rele */
#define IXA_FREE_TSL 0x00000002 /* ixa_tsl needs to be rele */

/*
* Trivial state machine used to synchronize IXA cleanup for TCP connections.
* See conn_ixa_cleanup().
*/
#define IXATC_IDLE 0x00000000
#define IXATC_INPROGRESS 0x00000001
#define IXATC_COMPLETE 0x00000002

/*
* Simplistic way to set the ixa_xmit_hint for locally generated traffic
* and forwarded traffic. The shift amount are based on the size of the
Expand Down Expand Up @@ -3030,6 +3041,7 @@ extern vmem_t *ip_minor_arena_la;
#define ips_ip_strict_src_multihoming ips_propinfo_tbl[80].prop_cur_uval
#define ips_ipv6_strict_src_multihoming ips_propinfo_tbl[81].prop_cur_uval
#define ips_ipv6_drop_inbound_icmpv6 ips_propinfo_tbl[82].prop_cur_bval
#define ips_ip_dce_reclaim_threshold ips_propinfo_tbl[83].prop_cur_uval

extern int dohwcksum; /* use h/w cksum if supported by the h/w */
#ifdef ZC_TEST
Expand Down
107 changes: 84 additions & 23 deletions usr/src/uts/common/inet/ip/ip_attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,59 @@ ixa_cleanup_stale(ip_xmit_attr_t *ixa)
}
}

static mblk_t *
tcp_ixa_cleanup_getmblk(conn_t *connp)
{
tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;
int need_retry;
mblk_t *mp;

mutex_enter(&tcps->tcps_ixa_cleanup_lock);

/*
* It's possible that someone else came in and started cleaning up
* another connection between the time we verified this one is not being
* cleaned up and the time we actually get the shared mblk. If that's
* the case, we've dropped the lock, and some other thread may have
* cleaned up this connection again, and is still waiting for
* notification of that cleanup's completion. Therefore we need to
* recheck.
*/
do {
need_retry = 0;
while (connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE) {
cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
&tcps->tcps_ixa_cleanup_lock);
}

while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
/*
* Multiple concurrent cleanups; need to have the last
* one run since it could be an unplumb.
*/
need_retry = 1;
cv_wait(&tcps->tcps_ixa_cleanup_ready_cv,
&tcps->tcps_ixa_cleanup_lock);
}
} while (need_retry);

/*
* We now have the lock and the mblk; now make sure that no one else can
* try to clean up this connection or enqueue it for cleanup, clear the
* mblk pointer for this stack, drop the lock, and return the mblk.
*/
ASSERT(MUTEX_HELD(&tcps->tcps_ixa_cleanup_lock));
ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_IDLE);
ASSERT(tcps->tcps_ixa_cleanup_mp == mp);
ASSERT(mp != NULL);

connp->conn_ixa->ixa_tcpcleanup = IXATC_INPROGRESS;
tcps->tcps_ixa_cleanup_mp = NULL;
mutex_exit(&tcps->tcps_ixa_cleanup_lock);

return (mp);
}

/*
* Used to run ixa_cleanup_stale inside the tcp squeue.
* When done we hand the mp back by assigning it to tcps_ixa_cleanup_mp
Expand All @@ -1195,11 +1248,39 @@ tcp_ixa_cleanup(void *arg, mblk_t *mp, void *arg2,

mutex_enter(&tcps->tcps_ixa_cleanup_lock);
ASSERT(tcps->tcps_ixa_cleanup_mp == NULL);
connp->conn_ixa->ixa_tcpcleanup = IXATC_COMPLETE;
tcps->tcps_ixa_cleanup_mp = mp;
cv_signal(&tcps->tcps_ixa_cleanup_cv);
cv_signal(&tcps->tcps_ixa_cleanup_ready_cv);
/*
* It is possible for any number of threads to be waiting for cleanup of
* different connections. Absent a per-connection (or per-IXA) CV, we
* need to wake them all up even though only one can be waiting on this
* particular cleanup.
*/
cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);
mutex_exit(&tcps->tcps_ixa_cleanup_lock);
}

static void
tcp_ixa_cleanup_wait_and_finish(conn_t *connp)
{
tcp_stack_t *tcps = connp->conn_netstack->netstack_tcp;

mutex_enter(&tcps->tcps_ixa_cleanup_lock);

ASSERT(connp->conn_ixa->ixa_tcpcleanup != IXATC_IDLE);

while (connp->conn_ixa->ixa_tcpcleanup == IXATC_INPROGRESS) {
cv_wait(&tcps->tcps_ixa_cleanup_done_cv,
&tcps->tcps_ixa_cleanup_lock);
}

ASSERT(connp->conn_ixa->ixa_tcpcleanup == IXATC_COMPLETE);
connp->conn_ixa->ixa_tcpcleanup = IXATC_IDLE;
cv_broadcast(&tcps->tcps_ixa_cleanup_done_cv);

mutex_exit(&tcps->tcps_ixa_cleanup_lock);
}

/*
* ipcl_walk() function to help release any IRE, NCE, or DCEs that
Expand All @@ -1214,21 +1295,8 @@ conn_ixa_cleanup(conn_t *connp, void *arg)

if (IPCL_IS_TCP(connp)) {
mblk_t *mp;
tcp_stack_t *tcps;

tcps = connp->conn_netstack->netstack_tcp;

mutex_enter(&tcps->tcps_ixa_cleanup_lock);
while ((mp = tcps->tcps_ixa_cleanup_mp) == NULL) {
/*
* Multiple concurrent cleanups; need to have the last
* one run since it could be an unplumb.
*/
cv_wait(&tcps->tcps_ixa_cleanup_cv,
&tcps->tcps_ixa_cleanup_lock);
}
tcps->tcps_ixa_cleanup_mp = NULL;
mutex_exit(&tcps->tcps_ixa_cleanup_lock);
mp = tcp_ixa_cleanup_getmblk(connp);

if (connp->conn_sqp->sq_run == curthread) {
/* Already on squeue */
Expand All @@ -1237,15 +1305,8 @@ conn_ixa_cleanup(conn_t *connp, void *arg)
CONN_INC_REF(connp);
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_ixa_cleanup,
connp, NULL, SQ_PROCESS, SQTAG_TCP_IXA_CLEANUP);

/* Wait until tcp_ixa_cleanup has run */
mutex_enter(&tcps->tcps_ixa_cleanup_lock);
while (tcps->tcps_ixa_cleanup_mp == NULL) {
cv_wait(&tcps->tcps_ixa_cleanup_cv,
&tcps->tcps_ixa_cleanup_lock);
}
mutex_exit(&tcps->tcps_ixa_cleanup_lock);
}
tcp_ixa_cleanup_wait_and_finish(connp);
} else if (IPCL_IS_SCTP(connp)) {
sctp_t *sctp;
sctp_faddr_t *fp;
Expand Down
Loading

0 comments on commit 7c6d702

Please sign in to comment.