Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Developemnt 313 mass deployment #1421

Merged
merged 38 commits into from
Dec 10, 2023
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
2764b66
add batches to mass deployment
AlaaElattar Nov 15, 2023
8a5084a
remove network model
AlaaElattar Nov 15, 2023
a1d9612
test mass deployment with 100 vm
AlaaElattar Dec 3, 2023
5d48c63
update it to 50 vms
AlaaElattar Dec 3, 2023
b2b6633
Merge branch 'development_313' of https://github.com/threefoldtech/tf…
AlaaElattar Dec 3, 2023
ae3254b
update batch
AlaaElattar Dec 3, 2023
1d1849f
add access true to network
AlaaElattar Dec 3, 2023
4674238
ping nodes before deploying
AlaaElattar Dec 3, 2023
5e5641c
Merge branch 'development_313' of https://github.com/threefoldtech/tf…
AlaaElattar Dec 4, 2023
47d5742
print node id
AlaaElattar Dec 4, 2023
b49a8d0
print node before try catch block
AlaaElattar Dec 4, 2023
55f74b6
exclude offline nodes
AlaaElattar Dec 4, 2023
d74fd63
fix usage of offline nodes
AlaaElattar Dec 4, 2023
8285b33
update to not chose offline node
AlaaElattar Dec 5, 2023
220471f
Merge branch 'development_313' of https://github.com/threefoldtech/tf…
AlaaElattar Dec 5, 2023
4e1b8f8
remove id var
AlaaElattar Dec 5, 2023
633c692
check seconds node when one fails
AlaaElattar Dec 5, 2023
10d2f67
get time of filter nodes and farms
AlaaElattar Dec 5, 2023
8380112
add promise race on ping nodes
AlaaElattar Dec 5, 2023
329e7ed
add promise to ping nodes
AlaaElattar Dec 5, 2023
27bf168
remove unused vars and add comments
AlaaElattar Dec 6, 2023
1a205c1
test promise of _createDeployment
AlaaElattar Dec 6, 2023
cd5a861
make deployment creation parallel
AlaaElattar Dec 6, 2023
e35d3e6
update timeout of deployment to be 2 min not 10
AlaaElattar Dec 7, 2023
d425fc9
apply pr comments
AlaaElattar Dec 7, 2023
f329357
fix smth in proxy query
AlaaElattar Dec 7, 2023
4b856af
remove farmsIds
AlaaElattar Dec 7, 2023
943f508
remove await
AlaaElattar Dec 7, 2023
1f20ff0
fix failing flow
AlaaElattar Dec 7, 2023
176c15f
add try && catch block aroung ping promises
AlaaElattar Dec 7, 2023
cd7edce
apply pr comments
AlaaElattar Dec 10, 2023
8a70f1d
fix not disconnecting issue
AlaaElattar Dec 10, 2023
0fc74e8
return totalVms to 250
AlaaElattar Dec 10, 2023
23aaf5d
add return type for PingNodes
AlaaElattar Dec 10, 2023
a26186c
apply comments
AlaaElattar Dec 10, 2023
1290536
fix error in condition
AlaaElattar Dec 10, 2023
4a66836
return pop after condition
AlaaElattar Dec 10, 2023
46056f5
remove counter when no nodes available
AlaaElattar Dec 10, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mass_deployments.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Mass Deployments

on:
schedule:
- cron: "0 1 * * *"
- cron: "0 */6 * * *"
workflow_dispatch:

jobs:
Expand Down
227 changes: 155 additions & 72 deletions packages/grid_client/scripts/mass_deployments.ts
Original file line number Diff line number Diff line change
@@ -1,119 +1,202 @@
import {
DiskModel,
FarmFilterOptions,
FilterOptions,
generateString,
GridClient,
MachineModel,
MachinesModel,
NetworkModel,
randomChoice,
NodeInfo,
TwinDeployment,
} from "../src";
import { config, getClient } from "./client_loader";
import { log } from "./utils";

async function pingNodes(
grid3: GridClient,
nodes: NodeInfo[],
): Promise<Promise<{ node: NodeInfo; error?: Error; res?: unknown }[]>> {
const pingPromises = nodes.map(async node => {
try {
const res = await grid3.zos.pingNode({ nodeId: node.nodeId });
return { node, res };
} catch (error) {
return { node, error };
}
});

const results = await Promise.all(pingPromises);
return results;
}

async function main() {
const grid3 = await getClient();

// Timeout for deploying vm is 2 min
grid3.clientOptions.deploymentTimeoutMinutes = 2;
await grid3._connect();

const errors: any = [];
const offlineNodes: number[] = [];
let failedCount = 0;
let successCount = 0;
const batchSize = 50;
const totalVMs = 250;
const batches = totalVMs / batchSize;

// resources
const cru = 1;
const mru = 256;
const rootFs = 1;
const publicIp = false;

console.time("Farms Time");
const farms = await grid3.capacity.filterFarms({
nodeMRU: mru / 1024,
nodeSRU: rootFs,
publicIp: publicIp,
availableFor: await grid3.twins.get_my_twin_id(),
randomize: true,
} as FarmFilterOptions);
console.timeEnd("Farms Time");

if (farms.length < 1) {
throw new Error("No farms found");
}

for (let i = 0; i < 100; i++) {
//Generating the resources
const cru = 1;
const mru = 256;
const diskSize = 5;
const rootFs = 1;
const vmName = "vm" + generateString(8);
const deploymentName = "dep" + generateString(8);
const publicIp = false;

// create network Object
const n = new NetworkModel();
n.name = "nw" + generateString(5);
n.ip_range = "10.238.0.0/16";

// create disk Object
const disk1 = new DiskModel();
disk1.name = "d" + generateString(5);
disk1.size = diskSize;
disk1.mountpoint = "/newDisk1";

//Farm Selection
const farms = await grid3.capacity.filterFarms({
nodeMRU: mru / 1024,
nodeSRU: diskSize + rootFs,
publicIp: publicIp,
availableFor: await grid3.twins.get_my_twin_id(),
randomize: true,
} as FarmFilterOptions);
console.time("Total Deployment Time");

if (farms.length < 1) {
throw new Error("No farms found");
}
for (let batch = 0; batch < batches; batch++) {
console.time("Batch " + (batch + 1));

//Node Selection
const farmIds = farms.map(farm => farm.farmId);
const nodes = await grid3.capacity.filterNodes({
cru: cru,
mru: mru / 1024,
sru: rootFs + diskSize,
sru: rootFs,
availableFor: await grid3.twins.get_my_twin_id(),
farmId: +randomChoice(farms).farmId,
farmIds: farmIds,
randomize: true,
} as FilterOptions);

if (nodes.length < 1) {
errors.push("Node not found");
failedCount++;
continue;
console.time("Ping Nodes");
const results = await pingNodes(grid3, nodes);
console.timeEnd("Ping Nodes");

// Check nodes results
results.forEach(({ node, res, error }) => {
if (res) {
console.log(`Node ${node.nodeId} is online`);
} else {
offlineNodes.push(node.nodeId);
console.log(`Node ${node.nodeId} is offline`);
if (error) {
console.error("Error:", error);
}
}
});

const onlineNodes = nodes.filter(node => !offlineNodes.includes(node.nodeId));

// Batch Deployment
const batchVMs: MachinesModel[] = [];
for (let i = 0; i < batchSize; i++) {
const vmName = "vm" + generateString(8);

if (onlineNodes.length < 0) {
errors.push("No online nodes available for deployment");
failedCount++;
continue;
}

const selectedNode = onlineNodes.pop();

// create vm node Object
const vm = new MachineModel();
vm.name = vmName;
vm.node_id = selectedNode.nodeId;
vm.disks = [];
vm.public_ip = publicIp;
vm.planetary = true;
vm.cpu = cru;
vm.memory = mru;
vm.rootfs_size = rootFs;
vm.flist = "https://hub.grid.tf/tf-official-apps/base:latest.flist";
vm.entrypoint = "/sbin/zinit init";
vm.env = {
SSH_KEY: config.ssh_key,
};

// create network model for each vm
const n = new NetworkModel();
n.name = "nw" + generateString(5);
n.ip_range = "10.238.0.0/16";
n.addAccess = true;

// create VMs Object for each vm
const vms = new MachinesModel();
vms.name = "batch" + (batch + 1);
vms.network = n;
vms.machines = [vm];
vms.metadata = "";
vms.description = "Test deploying vm with name " + vm.name + " via ts grid3 client - Batch " + (batch + 1);

batchVMs.push(vms);
}

const allTwinDeployments: TwinDeployment[] = [];

const deploymentPromises = batchVMs.map(async (vms, index) => {
try {
const [twinDeployments, _, __] = await grid3.machines._createDeployment(vms);
return { twinDeployments, batchIndex: index };
} catch (error) {
log(`Error creating deployment for batch ${batch + 1}: ${error}`);
return { twinDeployments: null, batchIndex: index };
}
});
console.time("Preparing Batch " + (batch + 1));
const deploymentResults = await Promise.all(deploymentPromises);
console.timeEnd("Preparing Batch " + (batch + 1));

for (const { twinDeployments } of deploymentResults) {
if (twinDeployments) {
allTwinDeployments.push(...twinDeployments);
}
}

// create vm node Object
const vm = new MachineModel();
vm.name = vmName;
vm.node_id = nodes[0].nodeId;
vm.disks = [disk1];
vm.public_ip = publicIp;
vm.planetary = true;
vm.cpu = cru;
vm.memory = mru;
vm.rootfs_size = rootFs;
vm.flist = "https://hub.grid.tf/tf-official-apps/base:latest.flist";
vm.entrypoint = "/sbin/zinit init";
vm.env = {
SSH_KEY: config.ssh_key,
};

// create VMs Object
const vms = new MachinesModel();
vms.name = deploymentName;
vms.network = n;
vms.machines = [vm];
vms.metadata = "";
vms.description = "test deploying VMs via ts grid3 client";

// deploy vm
try {
await grid3.machines.deploy(vms);
successCount++;
await grid3.machines.twinDeploymentHandler.handle(allTwinDeployments);
successCount += batchSize;
log(`Successfully handled and saved contracts for all twin deployments`);
} catch (error) {
log(error);
failedCount += batchSize;
errors.push(error);
failedCount++;
continue;
log(`Error handling contracts for all twin deployments: ${error}`);
}

console.timeEnd("Batch " + (batch + 1));
}

console.timeEnd("Total Deployment Time");

log("Successful Deployments: " + successCount);
log("Failed Deployments: " + failedCount);

// List of failed deployments errors
// List of failed deployments' errors
log("Failed deployments errors: ");
for (let i = 0; i < errors.length; i++) {
log(errors[i]);
log("---------------------------------------------");
}

// List of offline nodes
log("Failed Nodes: ");
for (let i = 0; i < offlineNodes.length; i++) {
log(offlineNodes[i]);
log("---------------------------------------------");
}

await grid3.disconnect();
}

Expand Down
1 change: 1 addition & 0 deletions packages/grid_client/src/modules/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,7 @@ class FilterOptions {
@Expose() @IsOptional() @IsBoolean() gateway?: boolean;
@Expose() @IsOptional() @IsBoolean() certified?: boolean;
@Expose() @IsOptional() @IsInt({ each: true }) @Min(1, { each: true }) nodeExclude?: number[];
@Expose() @IsOptional() @IsInt({ each: true }) @Min(1, { each: true }) farmIds?: number[];
@Expose() @IsOptional() @IsInt() @Min(1) farmId?: number;
@Expose() @IsOptional() @IsString() farmName?: string;
@Expose() @IsOptional() @IsString() country?: string;
Expand Down
2 changes: 1 addition & 1 deletion packages/grid_client/src/modules/zos.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class Zos {
@validateInput
async pingNode(options: PingNodeOptionsModel): Promise<boolean> {
const nodeTwinId = await this.capacity.getNodeTwinId(options.nodeId);
return await this.rmb.request([nodeTwinId], "zos.system.version", "", 20, 1);
return await this.rmb.request([nodeTwinId], "zos.system.version", "", 10, 1);
}

@expose
Expand Down
2 changes: 1 addition & 1 deletion packages/grid_client/src/primitives/nodes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ class Nodes {
ipv6: options.accessNodeV6,
gateway: options.gateway,
certification_type: options.certified ? "Certified" : "",
farm_ids: [options.farmId],
farm_ids: options.farmId ? [options.farmId] : options.farmIds,
farm_name: options.farmName,
country: options.country,
city: options.city,
Expand Down