feat: support `healthcheck` when connect to etcd cluster #96

tzssangglass · 2020-11-13T09:32:48Z

fix: #55

tzssangglass · 2020-11-15T16:15:02Z

ping @membphis @spacewander
How to write test cases for errors of etcd cluster runtime, such as network partitioning or etcd node server unavailabe?
I don't have a clear idea.
I try as follows:

=== TEST 2: connect timeout
--- http_config eval: $::HttpConfig
--- config

    resolver 8.8.8.8;
    resolver_timeout 1s;

    location /t {
        content_by_lua_block {
            local etcd, err = require "resty.etcd" .new({
                protocol = "v3",
                http_host = {
                    "http://127.0.0.1:12379",
                    "http://127.0.0.1:22379",
                    "http://127.0.0.1:32379",
                },
                user = 'root',
                password = 'abc123',
                cluster_healthcheck = {
                    shm_name = 'test_shm',
                }
            })

            check_res(etcd, err)
            local network_isolation_cmd = "iptables -A INPUT -i lo -s 127.0.0.1 -d 127.0.0.1 -p tcp --dport 12379 -j DROP"
            os.execute(network_isolation_cmd)

            local res, err = etcd:set("/test", { a='abc'})
            check_res(res, err)

            local network_recovery_cmd = "iptables -D INPUT -i lo -s 127.0.0.1 -d 127.0.0.1 -p tcp --dport 12379 -j DROP"
            os.execute(network_recovery_cmd)

            ngx.say(err)
        }
    }
--- request
GET /t
--- no_error_log
[error]
--- response_body
timeout

I'm not sure it's appropriate to write test cases this way.

spacewander · 2020-11-16T01:08:17Z

I don't have any idea better than yours.

…duced locally in CI.

tzssangglass · 2020-11-24T01:04:19Z

ping @membphis it'ok.

tzssangglass · 2020-11-24T01:06:28Z

I tried the following two test cases, which worked in my local environment, but always failed in CI.

=== TEST 6: mock tcp connect timeout and recovery, report the node unhealthy and health
--- http_config eval: $::HttpConfig
--- config
    location /t {
        content_by_lua_block {
            local network_isolation_cmd = "export PATH=$PATH:/sbin && iptables -A INPUT -p tcp --dport 12379 -j DROP"
            io_opopen(network_isolation_cmd)
            ngx.sleep(1)

            local etcd, err = require "resty.etcd" .new({
                protocol = "v3",
                api_prefix = "/v3",
                http_host = {
                    "http://127.0.0.1:12379",
                    "http://127.0.0.1:22379",
                    "http://127.0.0.1:32379",
                },
                user = 'root',
                password = 'abc123',
                cluster_healthcheck = {
                    shm_name = 'test_shm',
                },
            })

            local res, err = etcd:set("/healthcheck", "yes")

            local network_recovery_cmd = "export PATH=$PATH:/sbin && iptables -D INPUT -p tcp --dport 12379 -j DROP"
            io_opopen(network_recovery_cmd)
            ngx.sleep(1)
        }
    }
--- request
GET /t
--- ignore_response
--- error_log eval
[qr/unhealthy TCP increment.*127.0.0.1:12379/,
qr/healthy SUCCESS increment.*127.0.0.1:12379/]
--- timeout: 10



=== TEST 7: mock network partition and recovery, report the node unhealthy and health
--- http_config eval: $::HttpConfig
--- config
    location /t {
        content_by_lua_block {
            io_opopen("export PATH=$PATH:/sbin && iptables -A INPUT -p tcp --dport 22380 -j DROP")
            io_opopen("export PATH=$PATH:/sbin && iptables -A INPUT -p tcp --dport 32380 -j DROP")
            ngx.sleep(3)

            local etcd, err = require "resty.etcd" .new({
                protocol = "v3",
                api_prefix = "/v3",
                http_host = {
                    "http://127.0.0.1:12379",
                    "http://127.0.0.1:22379",
                    "http://127.0.0.1:32379",
                },
                user = 'root',
                password = 'abc123',
                cluster_healthcheck = {
                    shm_name = 'test_shm',
                },
            })

            local res, err = etcd:set("/network/partition", "test")

            io_opopen("export PATH=$PATH:/sbin && iptables -D INPUT -p tcp --dport 22380 -j DROP")
            io_opopen("export PATH=$PATH:/sbin && iptables -D INPUT -p tcp --dport 32380 -j DROP")
            ngx.sleep(5)
        }
    }
--- request
GET /t
--- timeout: 20
--- ignore_response
--- error_log eval
[qr/unhealthy TCP increment.*127.0.0.1:12379/,
qr/healthy SUCCESS increment.*127.0.0.1:12379/]

tzssangglass · 2020-11-25T06:29:48Z

@membphis @spacewander @nic-chen pls review

spacewander · 2020-11-25T06:35:21Z

We are busy making a new release. Will take care about this a few days later.

tzssangglass · 2020-11-25T06:50:59Z

We are busy making a new release. Will take care about this a few days later.

get