Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client SYNC_CONNECTED despite being disconnected from the server for longer than session timeout #13

Open
emiloslavsky opened this issue Oct 3, 2013 · 7 comments · May be fixed by #102

Comments

@emiloslavsky
Copy link

Client stays in SYNC_CONNECTED state when disconnected from the server for longer than its session timeout. I expected DISCONNECTED state after second 3 or 5 below:

$ node watch.js 127.0.0.1:2181 /blah/eph3 bomb& sleep 1; sudo disable_port 2181 10;
[1] 29292
[ '127.0.0.1:2181', '/blah/eph3' ]
0: client is DISCONNECTED[0]
"connected" to the server.
[ 'client state', { name: 'SYNC_CONNECTED', code: 3 } ]
Node: /blah/eph3 is successfully created.
zk.exists@/blah/eph3 [path, result, error, stat] ["/blah/eph3",0,null,{"specification":[{"name":"czxid","type":"long"},{"name":"mzxid","type":"long"},{"name":"ctime","type":"long"},{"name":"mtime","type":"long"},{"name":"version","type":"int"},{"name":"cversion","type":"int"},{"name":"aversion","type":"int"},{"name":"ephemeralOwner","type":"long"},{"name":"dataLength","type":"int"},{"name":"numChildren","type":"int"},{"name":"pzxid","type":"long"}],"czxid":[0,0,0,0,0,0,10,41],"mzxid":[0,0,0,0,0,0,10,41],"ctime":[0,0,1,65,127,31,233,184],"mtime":[0,0,1,65,127,31,233,184],"version":0,"cversion":0,"aversion":0,"ephemeralOwner":[1,65,96,50,23,73,0,51],"dataLength":4,"numChildren":0,"pzxid":[0,0,0,0,0,0,10,41]}]
1: client is SYNC_CONNECTED[3]
2: client is SYNC_CONNECTED[3]
3: client is SYNC_CONNECTED[3]
4: client is SYNC_CONNECTED[3]
5: client is SYNC_CONNECTED[3]
6: client is SYNC_CONNECTED[3]
7: client is SYNC_CONNECTED[3]
8: client is SYNC_CONNECTED[3]
9: client is SYNC_CONNECTED[3]
10: client is SYNC_CONNECTED[3]
11: client is SYNC_CONNECTED[3]
12: client is SYNC_CONNECTED[3]
zk watcher@/blah/eph3: NODE_DELETED[2]@/blah/eph3 state SYNC_CONNECTED[3]
zk.exists@/blah/eph3 [path, result, error, stat] ["/blah/eph3",-4,"Exception: CONNECTION_LOSS[-4]",null]
[ 'client state', { name: 'DISCONNECTED', code: 0 } ]
[ 'client state', { name: 'EXPIRED', code: -122 } ]
close1
[ 'client state', { name: 'DISCONNECTED', code: 0 } ]
13: client is DISCONNECTED[0]
14: client is DISCONNECTED[0]
15: client is DISCONNECTED[0]
$ cat watch.js
var zookeeper = require('node-zookeeper-client');
function inspect(o) { return require('util').inspect(o, false, null); }

console.dir([process.argv[2], process.argv[3]]);
var client = zookeeper.createClient(process.argv[2], {retries: 0, sessionTimeout: 5000});
var path = process.argv[3];
var data = new Buffer(process.argv[4]);

function exceptionObjectToCode(exceptionObject) { return exceptionObject ? exceptionObject.getCode() : zookeeper.Exception.OK; }
function exceptionObjectToString(exceptionObject) { return exceptionObject ? exceptionObject.toString() : null; }

client.on('state', function (s) {
   console.dir(['client state', s]);
   if (client.getState() === zookeeper.State.EXPIRED) {
     console.log('close1');
     client.close();
   }
});

function curryExistsHandler(path, next) {
  return function (exceptionObject, stat) {
    var rc = exceptionObjectToCode(exceptionObject); 
    var error = exceptionObjectToString(exceptionObject);
    console.log("zk.exists@" + path + " [path, result, error, stat] " + JSON.stringify([path, rc, error, stat]));
    return next(null);
  };
}
function watcherCallback(watcherEvent) {
  var type = watcherEvent.getType();
  var path = watcherEvent.getPath();
  console.log('zk watcher@' + path + ': ' + watcherEvent.toString() + ' state ' + client.getState());
  client.exists(path, watcherCallback,
    curryExistsHandler(path,
      function(nextError) {
        if (nextError) {
          console.log('exists callback throwing ' + inspect(nextError));
          throw nextError;
        }
      }));
}

client.once('connected', function () {
    console.log('"connected" to the server.');
    client.create(path,  new Buffer('blah'), zookeeper.ACL.OPEN_ACL_UNSAFE, 
      zookeeper.CreateMode.EPHEMERAL,
      function (error) {
        if (error) {
            console.log('Failed to create node: %s due to: %s.', path, error);
        } else {
            console.log('Node: %s is successfully created.', path);
        }

        client.exists(path, watcherCallback, curryExistsHandler(path,
              function(nextError) {
                if (nextError) {
                  console.log('exists (top) callback throwing ' + inspect(nextError));
                  throw nextError;
                }
              }));
    });
});
client.connect();

var startTime = Date.now();
function reportClientStateLoop() {
  console.log(Math.round((Date.now()-startTime)/1000) + ': client is ' + client.getState());
  setTimeout(reportClientStateLoop, 1000);
}
reportClientStateLoop();

$ cat disable_port
/bin/bash -c "/sbin/iptables -I INPUT 1 -p tcp --sport ${1} -j DROP; /sbin/iptables -I INPUT 2 -p tcp --dport ${1} -j DROP; sleep ${2}; /sbin/iptables -D INPUT 1; /sbin/iptables -D INPUT 1"
@kmailkarthik
Copy link

Is there any plan to fix this? We always see this when the remote server is powered off and the client doesn't get any response from server.

@aatong
Copy link

aatong commented Jul 1, 2014

Would also like to know if this can be fixed.

@alexguan
Copy link
Owner

I will take a look this weekend.

@raghu-yemmanuru
Copy link

Hi alexguan, I was also looking at this issue. Basically it happens in two cases

  1. server goes into network partitioning and can't reach clients
  2. Server is powered off without a chance to gracefully clean up its connections.

I have worked on a fix for this. Idea is to use the readTimeout (2/3 * sessiontimeout). If with in this time if we have not seen any data, then we can destroy the socket on client side. this will reconnect to other available servers.Since, we are sending ping every 1/3_sessiontimeout, we should be expecting a read response within 2/3_sessiontimeout. I saw "C" Implementation and they have this timeout handling.

Let me know what you think of this patch.

diff:


--- /home/ryemmanu/duncans/controller/node_modules/node-zookeeper-client/lib/ConnectionManager.js       2014-03-31 20:05:07.000000000 -0400
+++ ConnectionManager.js        2014-07-24 15:34:15.943699955 -0400
@@ -61,6 +61,7 @@
     this.updateTimeout(options.sessionTimeout);
     this.connectTimeoutHandler = null;
+    this.readTimeoutHandler = null;
     this.xid = 0;
@@ -113,7 +114,7 @@
     // We at least send out one ping one third of the session timeout, so
     // the read timeout is two third of the session timeout.
     this.pingTimeout = Math.floor(this.sessionTimeout / 3);
-    //this.readTimeout = Math.floor(sessionTimeout * 2 / 3);
+    this.readTimeout = Math.floor(sessionTimeout * 2 / 3);
 };
 /**
@@ -286,6 +287,11 @@
         clearTimeout(this.connectTimeoutHandler);
     }
+    if (this.readTimeoutHandler) {
+        clearTimeout(this.readTimeoutHandler);
+        this.readTimeoutHandler = null;
+    }
+
     // After socket error, the socket closed event will be triggered,
     // we will retry connect in that listener function.
 };
@@ -390,6 +396,12 @@
         response,
         event;
+    // clear readTimeoutHandler on receiving data
+    if (this.readTimeoutHandler) {
+        clearTimeout(this.readTimeoutHandler);
+        this.readTimeoutHandler = null;
+    }
+
     // Combine the pending buffer with the new buffer.
     if (self.pendingBuffer) {
         buffer = Buffer.concat(
@@ -575,6 +587,13 @@
         }
     }
+    self.readTimeoutHandler = setTimeout(
+        function () {
+            self.socket.destroy(); // this will trigger reconnect
+        },
+        self.readTimeout
+    );
+
     // We have more data to process, need to recursively process it.
     if (self.pendingBuffer) {
         self.onSocketData(new Buffer(0));

@raghu-yemmanuru
Copy link

hey , please let me know if the patch is good enough or you have a better one?

@tb01923
Copy link

tb01923 commented Sep 8, 2016

Any update on this?

@aleung
Copy link

aleung commented Feb 11, 2019

It's a blocking issue to me. Any plan on fixing this issue?

aleung added a commit to aleung/node-zookeeper-client that referenced this issue Feb 12, 2019
aleung added a commit to aleung/node-zookeeper-client that referenced this issue Jan 6, 2020
dreusel referenced this issue in dreusel/node-zookeeper-client Dec 13, 2020
Apply fix to #13: Client stays in SYNC_CONNECTED state when disconnec…
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
7 participants