From 0824c85efbc459a00b9fc8ac2f3c7f0986e04f95 Mon Sep 17 00:00:00 2001 From: SwapnilGaonkar7 <68695643+SwapnilGaonkar7@users.noreply.github.com> Date: Wed, 22 Jun 2022 09:04:45 +0530 Subject: [PATCH] CORTX-29979: Multiple Data pod deployment: Support Node group in CDF (#2116) 1. Multiple Data pod deployment: Support Node group in CDF 2. Hare builds are failing for main and custom-ci branches (#2122) Solution: 1: Added support for 'node_group' in CDF format 2: The issue was with the version of charset-normalizer, a package that aiohttp (a package required by HARE) was pulling in as a depend. We have now set the version of charset-normalizer to 2.0.12 - which works with aiohttp 3.8.1 as required by HARE. Signed-off-by: Swapnil Gaonkar Signed-off-by: pavankrishnat Signed-off-by: Deepak Nayak --- Makefile | 2 + cfgen/cfgen | 1 + cfgen/dhall/types/NodeDesc.dhall | 1 + cfgen/examples/ci-boot1-2ios.yaml | 1 + cfgen/examples/ci-boot2-1confd.yaml | 2 + cfgen/examples/ci-boot2.yaml | 2 + cfgen/examples/ci-boot3.yaml | 3 + cfgen/examples/ldr1-cluster.yaml | 2 + cfgen/examples/multipools.yaml | 2 + cfgen/examples/singlenode.yaml | 1 + cfgen/tests/singlenode.dhall | 1 + provisioning/miniprov/hare_mp/cdf.py | 16 +++- .../miniprov/hare_mp/dhall/gencdf.dhall | 2 + .../hare.config.conf.tmpl.3-node.sample | 3 + provisioning/miniprov/hare_mp/types.py | 1 + provisioning/miniprov/hare_mp/utils.py | 12 +++ provisioning/miniprov/test/test_cdf.py | 93 ++++++++++++++----- 17 files changed, 121 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index ddb69f5e4..9bbf52b9d 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# +# # For any questions about this software or licensing, # please email opensource@seagate.com or cortx-questions@seagate.com. diff --git a/cfgen/cfgen b/cfgen/cfgen index 1b0510cda..6f6a119c0 100755 --- a/cfgen/cfgen +++ b/cfgen/cfgen @@ -179,6 +179,7 @@ class ShowSchema(argparse.Action): create_aux: # optional, defaults to false if not present in CDF nodes: - hostname: # [user@]hostname; e.g., localhost, pod-c1 + node_group: # name of the node_group in which this node is present data_iface: # name of network interface; e.g., eth1, eth1:c1 data_iface_type: tcp|o2ib # type of network interface; # optional, defaults to "tcp" diff --git a/cfgen/dhall/types/NodeDesc.dhall b/cfgen/dhall/types/NodeDesc.dhall index c61eae4cf..231c30fd3 100644 --- a/cfgen/dhall/types/NodeDesc.dhall +++ b/cfgen/dhall/types/NodeDesc.dhall @@ -19,6 +19,7 @@ -} { hostname : Text +, node_group: Optional Text , machine_id : Optional Text , processorcount: Optional Natural , memorysize_mb: Optional Double diff --git a/cfgen/examples/ci-boot1-2ios.yaml b/cfgen/examples/ci-boot1-2ios.yaml index 8aa3e1380..c621c79b0 100644 --- a/cfgen/examples/ci-boot1-2ios.yaml +++ b/cfgen/examples/ci-boot1-2ios.yaml @@ -1,5 +1,6 @@ nodes: - hostname: localhost + node_group: localhost data_iface: eth1 data_iface_type: tcp transport_type: libfab diff --git a/cfgen/examples/ci-boot2-1confd.yaml b/cfgen/examples/ci-boot2-1confd.yaml index a418100c6..95d4b3489 100644 --- a/cfgen/examples/ci-boot2-1confd.yaml +++ b/cfgen/examples/ci-boot2-1confd.yaml @@ -1,5 +1,6 @@ nodes: - hostname: ssu1 + node_group: ssu1 data_iface: eth1 transport_type: libfab m0_servers: @@ -18,6 +19,7 @@ nodes: - name: m0_client_other # name of the motr client instances: 2 # Number of instances, this host will run - hostname: ssu2 + node_group: ssu2 data_iface: eth1 transport_type: libfab m0_servers: diff --git a/cfgen/examples/ci-boot2.yaml b/cfgen/examples/ci-boot2.yaml index 239f38ce1..bc7cc6f19 100644 --- a/cfgen/examples/ci-boot2.yaml +++ b/cfgen/examples/ci-boot2.yaml @@ -1,5 +1,6 @@ nodes: - hostname: ssu1 + node_group: ssu1 data_iface: eth1 transport_type: libfab m0_servers: @@ -18,6 +19,7 @@ nodes: - name: m0_client_other # name of the motr client instances: 2 # Number of instances, this host will run - hostname: ssu2 + node_group: ssu2 data_iface: eth1 transport_type: libfab m0_servers: diff --git a/cfgen/examples/ci-boot3.yaml b/cfgen/examples/ci-boot3.yaml index a4330fe8f..19909f25c 100644 --- a/cfgen/examples/ci-boot3.yaml +++ b/cfgen/examples/ci-boot3.yaml @@ -1,5 +1,6 @@ nodes: - hostname: ssu1 + node_group: ssu1 data_iface: eth1 transport_type: libfab m0_servers: @@ -18,6 +19,7 @@ nodes: - name: m0_client_other # name of the motr client instances: 2 # Number of instances, this host will run - hostname: ssu2 + node_group: ssu2 data_iface: eth1 transport_type: libfab m0_servers: @@ -36,6 +38,7 @@ nodes: - name: m0_client_other # name of the motr client instances: 2 # Number of instances, this host will run - hostname: ssu3 + node_group: ssu3 data_iface: eth1 transport_type: libfab m0_servers: diff --git a/cfgen/examples/ldr1-cluster.yaml b/cfgen/examples/ldr1-cluster.yaml index c49304873..148c71d76 100644 --- a/cfgen/examples/ldr1-cluster.yaml +++ b/cfgen/examples/ldr1-cluster.yaml @@ -3,6 +3,7 @@ nodes: - hostname: pod-c1 # [user@]hostname + node_group: pod-c1 data_iface: eth1_c1 # name of data network interface data_iface_type: o2ib # LNet type of network interface (optional); # supported values: "tcp" (default), "o2ib" @@ -21,6 +22,7 @@ nodes: - name: m0_client_other # name of the motr client instances: 2 # Number of instances, this host will run - hostname: pod-c2 + node_group: pod-c2 data_iface: eth1_c2 data_iface_type: o2ib transport_type: libfab diff --git a/cfgen/examples/multipools.yaml b/cfgen/examples/multipools.yaml index 6bcbdeb39..3df5411fc 100644 --- a/cfgen/examples/multipools.yaml +++ b/cfgen/examples/multipools.yaml @@ -1,5 +1,6 @@ nodes: - hostname: srvnode-1 + node_group: srvnode-1 data_iface: enp175s0f1_c1 data_iface_type: o2ib transport_type: libfab @@ -33,6 +34,7 @@ nodes: # port: 21500 # m0_client_s3: 22500 - hostname: srvnode-2 + node_group: srvnode-2 data_iface: enp175s0f1_c2 data_iface_type: o2ib transport_type: libfab diff --git a/cfgen/examples/singlenode.yaml b/cfgen/examples/singlenode.yaml index 41628640b..19f5443cb 100644 --- a/cfgen/examples/singlenode.yaml +++ b/cfgen/examples/singlenode.yaml @@ -3,6 +3,7 @@ nodes: - hostname: localhost # [user@]hostname + node_group: localhost data_iface: eth1 # name of data network interface data_iface_ip_addr: null transport_type: libfab diff --git a/cfgen/tests/singlenode.dhall b/cfgen/tests/singlenode.dhall index 5c83d05df..8432ce5cd 100644 --- a/cfgen/tests/singlenode.dhall +++ b/cfgen/tests/singlenode.dhall @@ -26,6 +26,7 @@ in , nodes = [ { hostname = "localhost" , machine_id = None Text + , node_group = Some "localhost" , memorysize_mb = None Double , processorcount = None Natural , transport_type = "libfab" diff --git a/provisioning/miniprov/hare_mp/cdf.py b/provisioning/miniprov/hare_mp/cdf.py index 5fc0f8b58..346db8e3d 100644 --- a/provisioning/miniprov/hare_mp/cdf.py +++ b/provisioning/miniprov/hare_mp/cdf.py @@ -128,8 +128,14 @@ def _create_node_descriptions(self) -> List[NodeDesc]: if machine not in machines: machines.append(machine) + is_node_group_supported: bool = False + local_node_group = self.utils.get_node_group(conf.get_machine_id(), + allow_null=True) + + if local_node_group: + is_node_group_supported = True for machine in machines: - nodes.append(self._create_node(machine)) + nodes.append(self._create_node(machine, is_node_group_supported)) return nodes # cluster>storage_set[N]>durability>{type}>data/parity/spare @@ -495,14 +501,17 @@ def _get_node_clients(self, machine_id: str) -> Iterator[M0ClientDesc]: name=Text(name), instances=no_instances) - def _create_node(self, machine_id: str) -> NodeDesc: + def _create_node(self, machine_id: str, + is_node_group_supported: bool) -> NodeDesc: store = self.provider hostname = self.utils.get_hostname(machine_id) - # node>{machine-id}>name + node_group = None iface = self._get_iface(machine_id) servers = None if(self.utils.is_motr_io_present(machine_id)): + if is_node_group_supported: + node_group = self.utils.get_node_group(machine_id) # Currently, there is 1 m0d per cvg. # We will create 1 IO service entry in CDF per cvg. # An IO service entry will use data and metadat devices @@ -543,6 +552,7 @@ def _create_node(self, machine_id: str) -> NodeDesc: return NodeDesc( hostname=Text(hostname), + node_group=Maybe(Text(str(node_group)), 'Text'), machine_id=Maybe(Text(machine_id), 'Text'), processorcount=Maybe(node_facts['processorcount'], 'Natural'), memorysize_mb=Maybe(node_facts['memorysize_mb'], 'Double'), diff --git a/provisioning/miniprov/hare_mp/dhall/gencdf.dhall b/provisioning/miniprov/hare_mp/dhall/gencdf.dhall index ec45c155c..070b3de59 100644 --- a/provisioning/miniprov/hare_mp/dhall/gencdf.dhall +++ b/provisioning/miniprov/hare_mp/dhall/gencdf.dhall @@ -38,6 +38,7 @@ let ServerPort = let NodeInfo = { hostname : Text + , node_group : Optional Text , machine_id : Optional Text , processorcount : Optional Natural , memorysize_mb : Optional Double @@ -85,6 +86,7 @@ let toNodeDesc : NodeInfo -> T.NodeDesc = \(n : NodeInfo) -> { hostname = n.hostname + , node_group = n.node_group , machine_id = n.machine_id , processorcount = n.processorcount , memorysize_mb = n.memorysize_mb diff --git a/provisioning/miniprov/hare_mp/templates/hare.config.conf.tmpl.3-node.sample b/provisioning/miniprov/hare_mp/templates/hare.config.conf.tmpl.3-node.sample index a9a2cdf86..157b271dc 100644 --- a/provisioning/miniprov/hare_mp/templates/hare.config.conf.tmpl.3-node.sample +++ b/provisioning/miniprov/hare_mp/templates/hare.config.conf.tmpl.3-node.sample @@ -38,6 +38,7 @@ "cluster_id": "my-cluster", "hostname": "ssc-vm-1623.colo.seagate.com", "name": "srvnode-1", + "node_group": "ssc-vm-1623.colo.seagate.com", "type": "storage_node", "components": [ { "name": "hare" }, @@ -86,6 +87,7 @@ "cluster_id": "my-cluster", "hostname": "ssc-vm-1624.colo.seagate.com", "name": "srvnode-2", + "node_group": "ssc-vm-1624.colo.seagate.com", "type": "storage_node", "network": { "data": { @@ -129,6 +131,7 @@ "cluster_id": "my-cluster", "hostname": "ssc-vm-1625.colo.seagate.com", "name": "srvnode-3", + "node_group": "ssc-vm-1625.colo.seagate.com", "type": "storage_node", "network": { "data": { diff --git a/provisioning/miniprov/hare_mp/types.py b/provisioning/miniprov/hare_mp/types.py index e87b07802..a60cab31d 100644 --- a/provisioning/miniprov/hare_mp/types.py +++ b/provisioning/miniprov/hare_mp/types.py @@ -133,6 +133,7 @@ class NetworkPorts(DhallTuple): @dataclass(repr=False) class NodeDesc(DhallTuple): hostname: Text + node_group: Maybe[Text] machine_id: Maybe[Text] processorcount: Maybe[int] memorysize_mb: Maybe[int] diff --git a/provisioning/miniprov/hare_mp/utils.py b/provisioning/miniprov/hare_mp/utils.py index 06190da08..d1590c4fe 100755 --- a/provisioning/miniprov/hare_mp/utils.py +++ b/provisioning/miniprov/hare_mp/utils.py @@ -349,6 +349,18 @@ def save_ssl_config(self): }) self.kv.kv_put('ssl/hax', ssl_hax) + # Provisioner will be generating node_group from each data pod and place + # gconf copy into consul. Hence from consul for all data pods node_group + # value will be available. + @func_log(func_enter, func_leave) + @repeat_if_fails() + def get_node_group(self, machine_id: str, allow_null: bool = False): + key = f'conf/node>{machine_id}>node_group' + node_group = self.kv.kv_get(key, allow_null=allow_null) + if node_group: + return node_group['Value'].decode() + return None + class LogWriter: def __init__(self, logger: logging.Logger, logging_handler): diff --git a/provisioning/miniprov/test/test_cdf.py b/provisioning/miniprov/test/test_cdf.py index 1f2bf6c23..936abbdd9 100644 --- a/provisioning/miniprov/test/test_cdf.py +++ b/provisioning/miniprov/test/test_cdf.py @@ -114,8 +114,17 @@ def new_kv(key: str, val: str): store.get_motr_clients = Mock(return_value=[]) utils = Utils(store) kv = KVAdapter() - def my_get(key: str, recurse: bool = False): - if key == 'srvnode-1.data.private/drives/dev/sda': + def my_get(key: str, recurse: bool = False, allow_null: bool = False): + if key == 'conf/node>1114a50a6bf6f9c93ebd3c49d07d3fd4>node_group': + return new_kv('conf/node>1114a50a6bf6f9c93ebd3c49d07d3fd4>node_group', + "ssc-vm-1623.colo.seagate.com".encode()) + elif key == 'conf/node>9ec5de3a8b57493e8fc7bfae67ecd3b3>node_group': + return new_kv('conf/node>9ec5de3a8b57493e8fc7bfae67ecd3b3>node_group', + "ssc-vm-1624.colo.seagate.com".encode()) + elif key == 'conf/node>846fd26885f8423a8da0626538ed47bc>node_group': + return new_kv('conf/node>846fd26885f8423a8da0626538ed47bc>node_group', + "ssc-vm-1625.colo.seagate.com".encode()) + elif key == 'srvnode-1.data.private/drives/dev/sda': return new_kv('srvnode-1.data.private/drives/dev/sda', json.dumps({"path": "/dev/sda", "size": "4096000", @@ -149,7 +158,10 @@ def my_get(key: str, recurse: bool = False): return new_kv('srvnode-1.data.private/facts', json.dumps({"processorcount": "16", "memorysize_mb": "4096.123"})) - raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') + if allow_null: + return None + else: + raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') kv.kv_get = my_get utils.kv = kv @@ -238,8 +250,11 @@ def ret_values(value: str) -> Any: store.get_motr_clients = Mock(return_value=[]) utils = Utils(store) kv = KVAdapter() - def my_get(key: str, recurse: bool = False): - if key == 'srvnode-1.data.private/drives/dev/sdb': + def my_get(key: str, recurse: bool = False, allow_null: bool = False): + if key == 'conf/node>MACH_ID>node_group' : + return new_kv('conf/node>MACH_ID>node_group', + "myhost".encode()) + elif key == 'srvnode-1.data.private/drives/dev/sdb': return new_kv('srvnode-1.data.private/drives/dev/sdb', json.dumps({"path": "/dev/sdb", "size": "4096000", @@ -253,7 +268,10 @@ def my_get(key: str, recurse: bool = False): return new_kv('srvnode-1.data.private/facts', json.dumps({"processorcount": "16", "memorysize_mb": "4096.123"})) - raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') + if allow_null: + return None + else: + raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') kv.kv_get = my_get utils.kv = kv @@ -341,8 +359,11 @@ def ret_values(value: str) -> Any: store.get_machine_ids_for_component = Mock(return_value=['MACH_ID']) utils = Utils(store) kv = KVAdapter() - def my_get(key: str, recurse: bool = False): - if key == 'srvnode-1.data.private/drives/dev/sdb': + def my_get(key: str, recurse: bool = False, allow_null: bool = False): + if key == 'conf/node>MACH_ID>node_group' : + return new_kv('conf/node>MACH_ID>node_group', + "mynodename".encode()) + elif key == 'srvnode-1.data.private/drives/dev/sdb': return new_kv('srvnode-1.data.private/drives/dev/sdb', json.dumps({"path": "/dev/sdb", "size": "4096000", @@ -356,7 +377,10 @@ def my_get(key: str, recurse: bool = False): return new_kv('srvnode-1.data.private/facts', json.dumps({"processorcount": "16", "memorysize_mb": "4096.123"})) - raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') + if allow_null: + return None + else: + raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') kv.kv_get = my_get utils.kv = kv @@ -603,8 +627,11 @@ def ret_values(value: str) -> Any: cdf._get_m0d_per_cvg = Mock(return_value=1) utils = Utils(store) kv = KVAdapter() - def my_get(key: str, recurse: bool = False): - if key == 'srvnode-1.data.private/drives/dev/sdb': + def my_get(key: str, recurse: bool = False, allow_null: bool = False): + if key == 'conf/node>MACH_ID>node_group' : + return new_kv('conf/node>MACH_ID>node_group', + "myhost".encode()) + elif key == 'srvnode-1.data.private/drives/dev/sdb': return new_kv('srvnode-1.data.private/drives/dev/sdb', json.dumps({"path": "/dev/sdb", "size": "4096000", @@ -618,7 +645,10 @@ def my_get(key: str, recurse: bool = False): return new_kv('srvnode-1.data.private/facts', json.dumps({"processorcount": "16", "memorysize_mb": "4096.123"})) - raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') + if allow_null: + return None + else: + raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') kv.kv_get = my_get @@ -929,8 +959,11 @@ def ret_values(value: str) -> Any: store.get_motr_clients = Mock(return_value=[]) utils = Utils(store) kv = KVAdapter() - def my_get(key: str, recurse: bool = False): - if key == 'srvnode-1.data.private/drives/dev/sdb': + def my_get(key: str, recurse: bool = False, allow_null: bool = False): + if key == 'conf/node>MACH_ID>node_group' : + return new_kv('conf/node>MACH_ID>node_group', + "mynodename".encode()) + elif key == 'srvnode-1.data.private/drives/dev/sdb': return new_kv('srvnode-1.data.private/drives/dev/sdb', json.dumps({"path": "/dev/sdb", "size": "4096000", @@ -944,7 +977,10 @@ def my_get(key: str, recurse: bool = False): return new_kv('srvnode-1.data.private/facts', json.dumps({"processorcount": "16", "memorysize_mb": "4096.123"})) - raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') + if allow_null: + return None + else: + raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') kv.kv_get = my_get utils.kv = kv @@ -1063,8 +1099,14 @@ def ret_values(value: str) -> Any: 'MACH_2_ID']) utils = Utils(store) kv = KVAdapter() - def my_get(key: str, recurse: bool = False): - if key == 'srvnode-1.data.private/drives/dev/sdb': + def my_get(key: str, recurse: bool = False, allow_null: bool = False): + if key == 'conf/node>MACH_ID>node_group' : + return new_kv('conf/node>MACH_ID>node_group', + "mynodename".encode()) + elif key == 'conf/node>MACH_2_ID>node_group' : + return new_kv('conf/node>MACH_2_ID>node_group', + "host-2".encode()) + elif key == 'srvnode-1.data.private/drives/dev/sdb': return new_kv('srvnode-1.data.private/drives/dev/sdb', json.dumps({"path": "/dev/sdb", "size": "4096000", @@ -1093,7 +1135,10 @@ def my_get(key: str, recurse: bool = False): json.dumps({"processorcount": "16", "memorysize_mb": "4096.123"})) - raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') + if allow_null: + return None + else: + raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') kv.kv_get = my_get utils.kv = kv @@ -1183,8 +1228,11 @@ def ret_values(value: str) -> Any: cdf = CdfGenerator(provider=store) utils = Utils(store) kv = KVAdapter() - def my_get(key: str, recurse: bool = False): - if key == 'srvnode-1.data.private/drives/dev/sdb': + def my_get(key: str, recurse: bool = False, allow_null: bool = False): + if key == 'conf/node>MACH_ID>node_group': + return new_kv('conf/node>MACH_ID>node_group', + "mynodename".encode()) + elif key == 'srvnode-1.data.private/drives/dev/sdb': return new_kv('srvnode-1.data.private/drives/dev/sdb', json.dumps({"path": "/dev/sdb", "size": "4096000", @@ -1198,7 +1246,10 @@ def my_get(key: str, recurse: bool = False): return new_kv('srvnode-1.data.private/facts', json.dumps({"processorcount": "16", "memorysize_mb": "4096.123"})) - raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') + if allow_null: + return None + else: + raise RuntimeError(f'Unexpected call: key={key}, recurse={recurse}') kv.kv_get = my_get