-
Notifications
You must be signed in to change notification settings - Fork 49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Allow different instance type per role #675
Changes from 13 commits
7b35c7c
f137ca5
97e2fd6
e70b62a
d6d36b8
254cbc4
ac72c54
400e885
5ce242f
1a46bbb
ba02618
4a9e843
8f48ff8
fa1783d
3694d13
945d05f
667a85c
9e8aaeb
87fbf18
35cb0ae
331c164
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
import uuid | ||
import yaml | ||
|
||
from boto.exception import BotoServerError | ||
|
||
# AppScale-specific imports | ||
from agents.factory import InfrastructureAgentFactory | ||
|
@@ -24,6 +25,7 @@ | |
from custom_exceptions import BadConfigurationException | ||
from custom_exceptions import ShellException | ||
from custom_exceptions import TimeoutException | ||
from agents.base_agent import AgentRuntimeException | ||
from agents.gce_agent import CredentialTypes | ||
from agents.gce_agent import GCEAgent | ||
from local_state import APPSCALE_VERSION | ||
|
@@ -151,31 +153,68 @@ def start_all_nodes(cls, options, node_layout): | |
|
||
agent.configure_instance_security(params) | ||
|
||
load_balancer_nodes = node_layout.get_nodes('load_balancer', True) | ||
instance_ids, public_ips, private_ips = cls.spawn_load_balancers_in_cloud( | ||
options, agent, params, | ||
len(load_balancer_nodes)) | ||
load_balancer_roles = {} | ||
|
||
for node_index, node in enumerate(load_balancer_nodes): | ||
index = node_layout.nodes.index(node) | ||
node_layout.nodes[index].public_ip = public_ips[node_index] | ||
node_layout.nodes[index].private_ip = private_ips[node_index] | ||
node_layout.nodes[index].instance_id = instance_ids[node_index] | ||
instance_type_roles = {'with_disks':{}, 'without_disks': {}} | ||
|
||
for node in node_layout.get_nodes('load_balancer', True): | ||
load_balancer_roles.setdefault(node.instance_type, []).append(node) | ||
|
||
for node in node_layout.get_nodes('load_balancer', False): | ||
instance_type = instance_type_roles['with_disks'] if node.disk else \ | ||
instance_type_roles['without_disks'] | ||
instance_type.setdefault(node.instance_type, []).append(node) | ||
|
||
spawned_instance_ids = [] | ||
|
||
for instance_type, load_balancer_nodes in load_balancer_roles.items(): | ||
# Copy parameters so we can modify the instance type. | ||
instance_type_params = params.copy() | ||
instance_type_params['instance_type'] = instance_type | ||
|
||
instance_ids, public_ips, private_ips = cls.spawn_nodes_in_cloud( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to clean up after itself if it encounters an exception? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes it does, It left my mind that we could technically have different instance types for LBs and it would need clean up if the 2nd LB faced a problem after the 1st one was started successfully. I have added the same try except as for non load balancer nodes. |
||
options, agent, instance_type_params, spawned_instance_ids, | ||
count=len(load_balancer_nodes), load_balancer=True) | ||
|
||
# Keep track of instances we have started. | ||
spawned_instance_ids.extend(instance_ids) | ||
|
||
for node_index, node in enumerate(load_balancer_nodes): | ||
index = node_layout.nodes.index(node) | ||
node_layout.nodes[index].public_ip = public_ips[node_index] | ||
node_layout.nodes[index].private_ip = private_ips[node_index] | ||
node_layout.nodes[index].instance_id = instance_ids[node_index] | ||
|
||
if options.static_ip: | ||
node = node_layout.head_node() | ||
agent.associate_static_ip(params, node.instance_id, | ||
options.static_ip) | ||
node.public_ip = options.static_ip | ||
AppScaleLogger.log("Static IP associated with head node.") | ||
|
||
AppScaleLogger.log("\nPlease wait for AppScale to prepare your machines " | ||
"for use. This can take few minutes.") | ||
|
||
other_nodes = node_layout.get_nodes('load_balancer', False) | ||
if len(other_nodes) > 0: | ||
_instance_ids, _public_ips, _private_ips = cls.spawn_other_nodes_in_cloud( | ||
agent, params, | ||
len(other_nodes)) | ||
for _, nodes in instance_type_roles.items(): | ||
for instance_type, other_nodes in nodes.items(): | ||
if len(other_nodes) <= 0: | ||
break | ||
# Copy parameters so we can modify the instance type. | ||
instance_type_params = params.copy() | ||
instance_type_params['instance_type'] = instance_type | ||
|
||
for node_index, node in enumerate(other_nodes): | ||
index = node_layout.nodes.index(node) | ||
node_layout.nodes[index].public_ip = _public_ips[node_index] | ||
node_layout.nodes[index].private_ip = _private_ips[node_index] | ||
node_layout.nodes[index].instance_id = _instance_ids[node_index] | ||
_instance_ids, _public_ips, _private_ips =\ | ||
cls.spawn_nodes_in_cloud(options, agent, instance_type_params, | ||
spawned_instance_ids, count=len(other_nodes)) | ||
|
||
# Keep track of instances we have started. | ||
spawned_instance_ids.extend(_instance_ids) | ||
|
||
for node_index, node in enumerate(other_nodes): | ||
index = node_layout.nodes.index(node) | ||
node_layout.nodes[index].public_ip = _public_ips[node_index] | ||
node_layout.nodes[index].private_ip = _private_ips[node_index] | ||
node_layout.nodes[index].instance_id = _instance_ids[node_index] | ||
|
||
return node_layout | ||
|
||
|
@@ -276,7 +315,8 @@ def start_head_node(cls, options, my_id, node_layout): | |
|
||
|
||
@classmethod | ||
def spawn_load_balancers_in_cloud(cls, options, agent, params, count=1): | ||
def spawn_nodes_in_cloud(cls, options, agent, params, spawned_instance_ids, | ||
count=1, load_balancer=False): | ||
"""Starts count number of virtual machines in a cloud infrastructure with | ||
public ips. | ||
|
||
|
@@ -288,40 +328,32 @@ def spawn_load_balancers_in_cloud(cls, options, agent, params, count=1): | |
agent: The agent to start VMs with, must be passed as an argument | ||
because agents cannot be made twice. | ||
params: The parameters to be sent to the agent. | ||
spawned_instance_ids: Ids of instances that AppScale has started. | ||
count: A int, the number of instances to start. | ||
load_balancer: A boolean indicating whether the spawned instance should | ||
have a public ip or not. | ||
Returns: | ||
The instance ID, public IP address, and private IP address of the machine | ||
that was started. | ||
""" | ||
instance_ids, public_ips, private_ips = agent.run_instances( | ||
count=count, parameters=params, security_configured=True, | ||
public_ip_needed=True) | ||
|
||
if options.static_ip: | ||
agent.associate_static_ip(params, instance_ids[0], options.static_ip) | ||
public_ips[0] = options.static_ip | ||
AppScaleLogger.log("Static IP associated with head node.") | ||
return instance_ids, public_ips, private_ips | ||
|
||
|
||
@classmethod | ||
def spawn_other_nodes_in_cloud(cls, agent, params, count=1): | ||
"""Starts count number of virtual machines in a cloud infrastructure. | ||
|
||
This method also prepares the virtual machine for use by the AppScale Tools. | ||
try: | ||
instance_ids, public_ips, private_ips = agent.run_instances( | ||
count=count, parameters=params, security_configured=True, | ||
public_ip_needed=load_balancer) | ||
except (AgentRuntimeException, BotoServerError): | ||
AppScaleLogger.warn("AppScale was unable to start the requested number " | ||
"of instances, attempting to terminate those that " | ||
"were started.") | ||
if len(spawned_instance_ids) > 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can the caller do this? |
||
AppScaleLogger.warn("Attempting to terminate those that were started.") | ||
cls.terminate_spawned_instances(spawned_instance_ids, agent, params) | ||
|
||
# Cleanup the keyname since it failed. | ||
LocalState.cleanup_keyname(options.keyname) | ||
|
||
# Re-raise the original exception. | ||
raise | ||
|
||
Args: | ||
agent: The agent to start VMs with, must be passed as an argument | ||
because agents cannot be made twice. | ||
params: The parameters to be sent to the agent. | ||
count: A int, the number of instances to start. | ||
Returns: | ||
The instance ID, public IP address, and private IP address of the machine | ||
that was started. | ||
""" | ||
instance_ids, public_ips, private_ips = agent.run_instances( | ||
count=count, parameters=params, security_configured=True, | ||
public_ip_needed=False) | ||
return instance_ids, public_ips, private_ips | ||
|
||
@classmethod | ||
|
@@ -850,6 +882,28 @@ def wait_for_machines_to_finish_loading(cls, host, keyname): | |
time.sleep(cls.WAIT_TIME) | ||
|
||
|
||
@classmethod | ||
def terminate_spawned_instances(cls, spawned_instance_ids, agent, params): | ||
""" Shuts down instances specified. For use when AppScale has failed to | ||
start all the instances for the deployment since we do not check or clean | ||
any local files. | ||
|
||
Args: | ||
spawned_instance_ids: A list of instance ids we have started that | ||
should be terminated. | ||
agent: The agent to call terminate instance with. | ||
params: Agent parameters. | ||
""" | ||
terminate_params = params.copy() | ||
terminate_params[agent.PARAM_INSTANCE_IDS] = spawned_instance_ids | ||
for _ in range(len(spawned_instance_ids)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this running the terminate call multiple times (based on the number of started machines)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, it does not need a loop because the terminate instances method uses a list of instances to delete anyways. |
||
try: | ||
agent.terminate_instances(params) | ||
except (AgentRuntimeException, BotoServerError): | ||
AppScaleLogger.warn("AppScale failed to terminate instance(s) with " | ||
"id(s): {}".format(spawned_instance_ids)) | ||
|
||
|
||
@classmethod | ||
def terminate_cloud_instance(cls, instance_id, options): | ||
""" Powers off a single instance in the currently AppScale deployment and | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm having trouble figuring out the reason for this separation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cdonati azure cannot attach existing disks to scaleset machines so they would have to be created as regular machines, it was added in a little early with the expectation of managed disks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay. That sounds like an implementation detail that belongs in the azure agent instead of here.