Source code for bioblend.cloudman.launch

"""
Setup and launch a CloudMan instance.
"""
import datetime
from httplib import HTTP
from urlparse import urlparse

import boto
from boto.ec2.regioninfo import RegionInfo
from boto.exception import EC2ResponseError

import yaml

import bioblend
from bioblend.util import Bunch


# Comment the following line if no logging at the prompt is desired
#bioblend.set_stream_logger(__name__)


[docs]class CloudManLauncher(object): def __init__(self, access_key, secret_key, cloud=None): """ Define the environment in which this instance of CloudMan will be launched. Besides providing the credentials, optionally provide the ``cloud`` object. This object must define the properties required to establish a `boto <https://github.com/boto/boto/>`_ connection to that cloud. See this method's implementation for an example of the required fields. Note that as long the as provided object defines the required fields, it can really by implemented as anything (e.g., a Bunch, a database object, a custom class). If no value for the ``cloud`` argument is provided, the default is to use the Amazon cloud. """ self.access_key = access_key self.secret_key = secret_key if cloud is None: # Default to an EC2-compatible object self.cloud = Bunch( id='1', # for compatibility w/ DB representation name="Amazon", cloud_type="ec2", bucket_default="cloudman", region_name="us-east-1", region_endpoint="ec2.amazonaws.com", ec2_port="", ec2_conn_path="/", cidr_range="", is_secure=True, s3_host="s3.amazonaws.com", s3_port="", s3_conn_path='/', ) else: self.cloud = cloud self.ec2_conn = self.connect_ec2(self.access_key, self.secret_key, self.cloud) def __repr__(self): return "Cloud: {0}; acct ID: {1}".format(self.cloud.name, self.access_key)
[docs] def launch(self, cluster_name, image_id, instance_type, password, kernel_id=None, ramdisk_id=None, key_name='cloudman_key_pair', security_groups=['CloudMan'], placement='', **kwargs): """ Check all the prerequisites (key pair and security groups) for launching a CloudMan instance, compose the user data based on the parameters specified in the arguments and the cloud properties as defined in the object's ``cloud`` field. For the current list of user data fields that can be provided via ``kwargs``, see `<http://wiki.g2.bx.psu.edu/CloudMan/UserData>`_ Return a dict containing the properties and info with which an instance was launched, namely: ``sg_names`` containing the names of the security groups, ``kp_name`` containing the name of the key pair, ``kp_material`` containing the private portion of the key pair (*note* that this portion of the key is available and can be retrieved *only* at the time the key is created, which will happen only if no key with the name provided in the ``key_name`` argument exists), ``rs`` containing the `boto <https://github.com/boto/boto/>`_ ``ResultSet`` object, ``instance_id`` containing the ID of a started instance, and ``error`` containing an error message if there was one. """ ret = {'sg_names': [], 'kp_name': '', 'kp_material': '', 'rs': None, 'instance_id': '', 'error': None} # First satisfy the prerequisites for sg in security_groups: ret['sg_names'].append(self.create_cm_security_group(sg)) ret['kp_name'], ret['kp_material'] = self.create_key_pair(key_name) # If not provided, find placement - see the method for potential issues! if placement == '': placement = self._find_placements(self.ec2_conn, instance_type, self.cloud.cloud_type)[0] # Compose user data for launching an instance, ensuring we have the required fields kwargs['access_key'] = self.access_key kwargs['secret_key'] = self.secret_key kwargs['cluster_name'] = cluster_name kwargs['password'] = password kwargs['cloud_name'] = self.cloud.name ud = self._compose_user_data(kwargs) # Now launch an instance try: rs = None rs = self.ec2_conn.run_instances(image_id=image_id, instance_type=instance_type, key_name=key_name, security_groups=security_groups, user_data=ud, kernel_id=kernel_id, ramdisk_id=ramdisk_id, placement=placement) ret['rs'] = rs except EC2ResponseError, e: err = "Problem launching an instance: %s" % e bioblend.log.error(err) ret['error'] = err return ret else: try: bioblend.log.info("Launched an instance with ID %s" % rs.instances[0].id) ret['instance_id'] = rs.instances[0].id ret['instance_ip'] = rs.instances[0].ip_address except Exception, e: err = "Problem with the launched instance object: %s" % e bioblend.log.error(err) ret['error'] = err return ret
[docs] def create_cm_security_group(self, sg_name='CloudMan'): """ Create a security group with all authorizations required to run CloudMan. If the group already exists, check its rules and add the missing ones. Return the name of the created security group. """ cmsg = None # Check if this security group already exists sgs = self.ec2_conn.get_all_security_groups() for sg in sgs: if sg.name == sg_name: cmsg = sg bioblend.log.debug("Security group '%s' already exists; will add authorizations next." % sg_name) break # If it does not exist, create security group if cmsg is None: bioblend.log.debug("Creating Security Group %s" % sg_name) cmsg = self.ec2_conn.create_security_group(sg_name, 'A security group for CloudMan') # Add appropriate authorization rules # If these rules already exist, nothing will be changed in the SG ports = (('80', '80'), # Web UI ('20', '21'), # FTP ('22', '22'), # ssh ('9600', '9700'), # HTCondor ('30000', '30100'), # FTP transfer ('42284', '42284')) # CloudMan UI for port in ports: try: if not self.rule_exists(cmsg.rules, from_port=port[0], to_port=port[1]): cmsg.authorize(ip_protocol='tcp', from_port=port[0], to_port=port[1], cidr_ip='0.0.0.0/0') else: bioblend.log.debug("Rule (%s:%s) already exists in the SG" % (port[0], port[1])) except EC2ResponseError, e: bioblend.log.error("A problem with security group authorizations: %s" % e) # Add ICMP (i.e., ping) rule required by HTCondor try: if not self.rule_exists(cmsg.rules, from_port='-1', to_port='-1', ip_protocol='icmp'): cmsg.authorize(ip_protocol='icmp', from_port=-1, to_port=-1, cidr_ip='0.0.0.0/0') else: bioblend.log.debug("ICMP rule already exists in {0} SG".format(sg_name)) except EC2ResponseError, e: bioblend.log.error("A problem with security group authorizations: %s" % e) # Add rule that allows communication between instances in the same SG g_rule_exists = False # Flag to indicate if group rule already exists ci = self._get_cloud_info(self.cloud) cloud_type = ci['cloud_type'] cidr_range = ci.get('cidr_range', '') # AWS allows grants to be named, thus allowing communication within a group. # Other cloud middlewares do now support that functionality so resort to CIDR grant. if cloud_type == 'ec2': for rule in cmsg.rules: for grant in rule.grants: if grant.name == cmsg.name: g_rule_exists = True bioblend.log.debug("Group rule already exists in the SG") if g_rule_exists: break else: for rule in cmsg.rules: for grant in rule.grants: if grant.cidr_ip == cidr_range: g_rule_exists = True bioblend.log.debug("Group rule already exists in the SG") if g_rule_exists: break if g_rule_exists is False: try: if cloud_type == 'ec2': cmsg.authorize(src_group=cmsg) else: cmsg.authorize(ip_protocol='tcp', from_port=1, to_port=65535, cidr_ip=cidr_range) except EC2ResponseError, e: bioblend.log.error("A problem w/ security group authorization: %s" % e) bioblend.log.info("Done configuring '%s' security group" % cmsg.name) return cmsg.name
[docs] def rule_exists(self, rules, from_port, to_port, ip_protocol='tcp', cidr_ip='0.0.0.0/0'): """ A convenience method to check if an authorization rule in a security group already exists. """ for rule in rules: if rule.ip_protocol == ip_protocol and rule.from_port == from_port and \ rule.to_port == to_port and cidr_ip in [ip.cidr_ip for ip in rule.grants]: return True return False
[docs] def create_key_pair(self, key_name='cloudman_key_pair'): """ Create a key pair with the provided ``key_name``. Return the name of the key or ``None`` if there was an error creating the key. """ kp = None # Check if a key pair under the given name already exists. If it does not, # create it, else return. kps = self.ec2_conn.get_all_key_pairs() for akp in kps: if akp.name == key_name: bioblend.log.debug("Key pair '%s' already exists; not creating it again." % key_name) return akp.name, None try: kp = self.ec2_conn.create_key_pair(key_name) except EC2ResponseError, e: bioblend.log.error("Problem creating key pair '%s': %s" % (key_name, e)) return None, None bioblend.log.info("Created key pair '%s'" % kp.name) return kp.name, kp.material
[docs] def get_status(self, instance_id): """ Check on the status of an instance. If ``instance_id`` is not provided, the ID obtained when launching *the most recent* instance is used. Note that this assumes the instance being checked on was launched using this class. Also note that the same class may be used to launch multiple instances but only the most recent ``instance_id`` is kept while any others will need to be explicitly specified. This method also allows the required ``ec2_conn`` connection object to be provided at invocation time. If the object is not provided, credentials defined for the class are used (ability to specify a custom ``ec2_conn`` helps in case of stateless method invocations). Return a ``state`` dict with the current ``instance_state``, ``public_ip``, ``placement``, and ``error`` keys, which capture the current state (the values for those keys default to empty string if no data is available from the cloud). """ ec2_conn = self.ec2_conn rs = None state = {'instance_state': "", 'public_ip': "", 'placement': "", 'error': ""} # Make sure we have an instance ID if instance_id is None: err = "Missing instance ID, cannot check the state." bioblend.log.error(err) state['error'] = err return state try: rs = ec2_conn.get_all_instances([instance_id]) if rs is not None: inst_state = rs[0].instances[0].update() public_ip = rs[0].instances[0].ip_address if inst_state == 'running': cm_url = "http://{dns}/cloud".format(dns=public_ip) # Wait until the CloudMan URL is accessible to return the data if self._checkURL(cm_url) is True: state['public_ip'] = public_ip state['instance_state'] = inst_state state['placement'] = rs[0].instances[0].placement else: state['instance_state'] = 'booting' else: state['instance_state'] = inst_state except Exception, e: err = "Problem updating instance '%s' state: %s" % (instance_id, e) bioblend.log.error(err) state['error'] = err return state
[docs] def connect_ec2(self, a_key, s_key, cloud=None): """ Create and return an EC2-compatible connection object for the given cloud. See ``_get_cloud_info`` method for more details on the requirements for the ``cloud`` parameter. If no value is provided, the class field is used. """ if cloud is None: cloud = self.cloud ci = self._get_cloud_info(cloud) r = RegionInfo(name=ci['region_name'], endpoint=ci['region_endpoint']) ec2_conn = boto.connect_ec2(aws_access_key_id=a_key, aws_secret_access_key=s_key, # api_version is needed for availability zone support for EC2 api_version='2012-06-01' if ci['cloud_type'] == 'ec2' else None, is_secure=ci['is_secure'], region=r, port=ci['ec2_port'], path=ci['ec2_conn_path'], validate_certs=False) return ec2_conn
def _compose_user_data(self, user_provided_data): """ A convenience method used to compose and properly format the user data required when requesting an instance. ``user_provided_data`` is the data provided by a user required to identify a cluster and user other user requirements. """ form_data = {} # Do not include the following fields in the user data but do include # any 'advanced startup fields' that might be added in the future excluded_fields = ['sg_name', 'image_id', 'instance_id', 'kp_name', 'cloud', 'cloud_type', 'public_dns', 'cidr_range', 'kp_material', 'placement'] for key, value in user_provided_data.iteritems(): if key not in excluded_fields: form_data[key] = value # If the following user data keys are empty, do not include them in the request user data udkeys = ['post_start_script_url', 'worker_post_start_script_url', 'bucket_default', 'share_string'] for udkey in udkeys: if udkey in form_data and form_data[udkey] == '': del form_data[udkey] # Check if bucket_default is defined for the given cloud and, if it was not # provided by the user, add it to the user data. However, do so only if the # the value is not blank - blank conflicts with CloudMan's ec2autorun.py if 'bucket_default' not in form_data and self.cloud.bucket_default != '': form_data['bucket_default'] = self.cloud.bucket_default # Reuse the ``password`` for the ``freenxpass`` user data option if 'freenxpass' not in form_data and 'password' in form_data: form_data['freenxpass'] = form_data['password'] # Convert form_data into the YAML format ud = yaml.dump(form_data, default_flow_style=False, allow_unicode=False) # Also include connection info about the selected cloud ci = self._get_cloud_info(self.cloud, as_str=True) return ud + "\n" + ci def _get_cloud_info(self, cloud, as_str=False): """ Get connection information about a given cloud """ ci = {} ci['cloud_type'] = cloud.cloud_type ci['region_name'] = cloud.region_name ci['region_endpoint'] = cloud.region_endpoint ci['is_secure'] = cloud.is_secure ci['ec2_port'] = cloud.ec2_port if cloud.ec2_port != '' else None ci['ec2_conn_path'] = cloud.ec2_conn_path # Include cidr_range only if not empty if cloud.cidr_range != '': ci['cidr_range'] = cloud.cidr_range ci['s3_host'] = cloud.s3_host ci['s3_port'] = cloud.s3_port if cloud.s3_port != '' else None ci['s3_conn_path'] = cloud.s3_conn_path if as_str: ci = yaml.dump(ci, default_flow_style=False, allow_unicode=False) return ci def _find_placements(self, ec2_conn, instance_type, cloud_type): """ Find an EC2 region zone that supports the requested instance type. We do this by checking the spot prices in the potential availability zones for support before deciding on a region: http://blog.piefox.com/2011/07/ec2-availability-zones-and-instance.html If instance_type is None, finds all zones that are currently available. Note that, currently, this only applies to AWS. For other clouds, all the available zones are returned. """ zones = [] in_the_past = datetime.datetime.now() - datetime.timedelta(hours=2) back_compatible_zone = "us-east-1e" for zone in ec2_conn.get_all_zones(): if zone.state in ["available"]: # Non EC2 clouds may not support get_spot_price_history if instance_type is not None and cloud_type == 'ec2': if (len(ec2_conn.get_spot_price_history(instance_type=instance_type, end_time=in_the_past.isoformat(), availability_zone=zone.name)) > 0): zones.append(zone.name) else: zones.append(zone.name) zones.sort(reverse=True) # Higher-lettered zones seem to have more availability currently if back_compatible_zone in zones: zones = [back_compatible_zone] + [z for z in zones if z != back_compatible_zone] if len(zones) == 0: bioblend.log.error("Did not find availabilty zone for {1}".format(instance_type)) zones.append(back_compatible_zone) return zones def _checkURL(self, url): """ Check if the ``url`` is *alive* (i.e., remote server returns code 200(OK) or 401 (unauthorized)). """ try: p = urlparse(url) h = HTTP(p[1]) h.putrequest('HEAD', p[2]) h.endheaders() r = h.getreply() if r[0] == 200 or r[0] == 401: # CloudMan UI is pwd protected so include 401 return True except Exception: # No response or no good response pass return False

Project Versions

This Page