richid · January 24, 2019 21:25 · richid · Jan 24, 2019
diff --git a/_spot_instance_termination_handler.py b/_spot_instance_termination_handler.py
 from dataclasses import dataclass
 from typing import Optional
 import boto3


 @dataclass(frozen=True)
 class EcsInstanceInfo:
    ec2_id: str
    ecs_instance_arn: str
    ecs_cluster_arn: str

    def ecs_cluster_name(self):
        return self.ecs_cluster_arn.split('/')[-1]

 try:
    session = boto3.Session(profile_name='local')
 except:
    session = boto3.Session()

 ecs = session.client('ecs')


 def main(event: dict, context: Optional[dict]) -> bool:
    ec2_instance_id = event['detail']['instance-id']
    instance_action = event['detail']['instance-action']

    if instance_action.lower() != 'terminate':
        print(f'Unexpected instance action ("{instance_action}"), skipping')
        return False

    print(f'Received spot instance termination notice for instance {ec2_instance_id}')

    instance_info = get_instance_info(ec2_instance_id)

    if not instance_info:
        print('Unable to determine ECS cluster, skipping')
        return False

    print(f'Instance {instance_info.ec2_id} is part of the {instance_info.ecs_cluster_name()} ECS cluster, will drain')

    return drain_ecs_instance(instance_info)


 def get_instance_info(ec2_instance_id: str) -> Optional[EcsInstanceInfo]:
    for cluster_arn in paginate(ecs.list_clusters):
        for inst_arn in paginate(ecs.list_container_instances, cluster=cluster_arn):

            inst_info = ecs.describe_container_instances(cluster=cluster_arn, containerInstances=[inst_arn])
            if 'containerInstances' not in inst_info:
                continue

            for inst in inst_info['containerInstances']:
                if ec2_instance_id == inst['ec2InstanceId']:
                    return EcsInstanceInfo(ec2_instance_id, inst['containerInstanceArn'], cluster_arn)

    return None


 def drain_ecs_instance(instance_info: EcsInstanceInfo) -> bool:
    response = ecs.update_container_instances_state(
        cluster=instance_info.ecs_cluster_arn,
        containerInstances=[instance_info.ecs_instance_arn],
        status='DRAINING'
    )

    if len(response['failures']) < 1:
        print(f'Instance {instance_info.ec2_id} successfully set to DRAINING')
        return True
    else:
        print(f'Error draining {instance_info.ec2_id}: {response["failures"]}')
        return False


 def paginate(method, **kwargs):
    client = method.__self__
    paginator = client.get_paginator(method.__name__)
    for page in paginator.paginate(**kwargs).result_key_iters():
        for result in page:
            yield result


 if __name__ == '__main__':
    instance_id = 'i-0938587f3cc35d73f'
    event = {
        "version": "0",
        "id": "12345678-1234-1234-1234-123456789012",
        "detail-type": "EC2 Spot Instance Interruption Warning",
        "source": "aws.ec2",
        "account": "123456789012",
        "time": "yyyy-mm-ddThh:mm:ssZ",
        "region": "us-east-2",
        "resources": ["arn:aws:ec2:us-east-2:123456789012:instance/" + instance_id],
        "detail": {
            "instance-id": instance_id,
            "instance-action": "terminate"
        }
    }

    main(event, None)
diff --git a/cloudwatch.tf b/cloudwatch.tf
 resource "aws_cloudwatch_event_rule" "spot_instance_termination_rule" {
  name        = "SpotInstanceTerminationRule"
  description = "Events rule for Spot Instance termination notices"

  event_pattern = <<PATTERN
 {
  "detail-type": [
    "EC2 Spot Instance Interruption Warning"
  ],  
  "source": [
    "aws.ec2"
  ]
 }
 PATTERN
 }

 resource "aws_cloudwatch_event_target" "spot_instance_termination_rule_target_lambda" {
  rule      = "${aws_cloudwatch_event_rule.spot_instance_termination_rule.name}"
  target_id = "InvokeLambda"
  arn       = "${aws_lambda_function.spot_instance_termination_lambda.arn}"
 }

 resource "aws_cloudwatch_event_target" "spot_instance_termination_rule_target_sns" {
  rule      = "${aws_cloudwatch_event_rule.spot_instance_termination_rule.name}"
  target_id = "SendToSNS"
  arn       = "${aws_sns_topic.spot_instance_termination_sns.arn}"
 }

 resource "aws_cloudwatch_log_group" "spot_instance_termination_lambda_log_group" {
  name              = "/aws/lambda/${aws_lambda_function.spot_instance_termination_lambda.function_name}"
  retention_in_days = 60
 }
diff --git a/iam.tf b/iam.tf
 resource "aws_iam_role" "spot_instance_termination_lambda_role" {
  name        = "SpotInstanceTerminationLambda"
  description = "Allows the Spot Instance termination Lambda access to EC2 and ECS."

  assume_role_policy = <<POLICY
 {
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Service": "lambda.amazonaws.com"
      },
      "Action": "sts:AssumeRole"
    }
  ]
 }
 POLICY
 }

 resource "aws_iam_role_policy_attachment" "spot_instance_termination_lambda_basic_attachment" {
  role       = "${aws_iam_role.spot_instance_termination_lambda_role.name}"
  policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
 }

 resource "aws_iam_role_policy" "spot_instance_termination_lambda_policy_attach_inline" {
  name = "SpotInstanceTerminationLambda"
  role = "${aws_iam_role.spot_instance_termination_lambda_role.name}"

  policy = <<EOF
 {
  "Version": "2012-10-17",
  "Statement": [
    {
      "Action": [
        "ecs:Describe*",
        "ecs:List*",
        "ecs:UpdateContainerInstanceStatus"
      ],
      "Effect": "Allow",
      "Resource": [
        "*"
      ]
    }
  ]
 }
 EOF
 }
diff --git a/lambda.tf b/lambda.tf
 resource "aws_lambda_function" "spot_instance_termination_lambda" {
  function_name = "SpotInstanceTermination"
  description   = "Handles Spot Instance termination messages by gracefully shutting down the instance."
  role          = "${aws_iam_role.spot_instance_termination_lambda_role.arn}"

  handler     = "_spot_instance_termination_handler.main"
  runtime     = "python3.7"
  timeout     = 30
  memory_size = 128
  kms_key_arn = "${data.aws_kms_key.lambda_kms.arn}"

  filename         = "${data.archive_file.spot_instance_termination_lambda_zip.output_path}"
  source_code_hash = "${data.archive_file.spot_instance_termination_lambda_zip.output_base64sha256}"
 }

 data "archive_file" "spot_instance_termination_lambda_zip" {
  type        = "zip"
  source_file = "_spot_instance_termination_handler.py"
  output_path = "dist/spot_instance_termination_lambda.zip"
 }

 resource "aws_lambda_permission" "spot_instance_termination_lambda_allow_cloudwatch" {
  statement_id  = "AllowInvokeFromCloudWatch"
  action        = "lambda:InvokeFunction"
  function_name = "${aws_lambda_function.spot_instance_termination_lambda.function_name}"
  principal     = "events.amazonaws.com"
  source_arn    = "${aws_cloudwatch_event_target.spot_instance_termination_rule_target_lambda.arn}"
 }
diff --git a/sns.tf b/sns.tf
 resource "aws_sns_topic" "spot_instance_termination_sns" {
  name              = "SpotInstanceTerminations"
  kms_master_key_id = "alias/aws/sns"
 }

 resource "aws_sns_topic_policy" "spot_instance_termination_sns_policy" {
  arn = "${aws_sns_topic.spot_instance_termination_sns.arn}"

  policy = <<POLICY
 {
  "Version": "2012-10-17",
  "Id": "snspolicy",
  "Statement": [{
    "Sid": "AllowPublishFromCloudWatchEvents",
    "Effect": "Allow",
    "Principal": {
      "Service": "events.amazonaws.com"
    },
    "Action": "sns:Publish",
    "Resource": "${aws_sns_topic.spot_instance_termination_sns.arn}"
  }]
 }
 POLICY
 }
	from dataclasses import dataclass
	from typing import Optional
	import boto3


	@dataclass(frozen=True)
	class EcsInstanceInfo:
	ec2_id: str
	ecs_instance_arn: str
	ecs_cluster_arn: str

	def ecs_cluster_name(self):
	return self.ecs_cluster_arn.split('/')[-1]

	try:
	session = boto3.Session(profile_name='local')
	except:
	session = boto3.Session()

	ecs = session.client('ecs')


	def main(event: dict, context: Optional[dict]) -> bool:
	ec2_instance_id = event['detail']['instance-id']
	instance_action = event['detail']['instance-action']

	if instance_action.lower() != 'terminate':
	print(f'Unexpected instance action ("{instance_action}"), skipping')
	return False

	print(f'Received spot instance termination notice for instance {ec2_instance_id}')

	instance_info = get_instance_info(ec2_instance_id)

	if not instance_info:
	print('Unable to determine ECS cluster, skipping')
	return False

	print(f'Instance {instance_info.ec2_id} is part of the {instance_info.ecs_cluster_name()} ECS cluster, will drain')

	return drain_ecs_instance(instance_info)


	def get_instance_info(ec2_instance_id: str) -> Optional[EcsInstanceInfo]:
	for cluster_arn in paginate(ecs.list_clusters):
	for inst_arn in paginate(ecs.list_container_instances, cluster=cluster_arn):

	inst_info = ecs.describe_container_instances(cluster=cluster_arn, containerInstances=[inst_arn])
	if 'containerInstances' not in inst_info:
	continue

	for inst in inst_info['containerInstances']:
	if ec2_instance_id == inst['ec2InstanceId']:
	return EcsInstanceInfo(ec2_instance_id, inst['containerInstanceArn'], cluster_arn)

	return None


	def drain_ecs_instance(instance_info: EcsInstanceInfo) -> bool:
	response = ecs.update_container_instances_state(
	cluster=instance_info.ecs_cluster_arn,
	containerInstances=[instance_info.ecs_instance_arn],
	status='DRAINING'
	)

	if len(response['failures']) < 1:
	print(f'Instance {instance_info.ec2_id} successfully set to DRAINING')
	return True
	else:
	print(f'Error draining {instance_info.ec2_id}: {response["failures"]}')
	return False


	def paginate(method, **kwargs):
	client = method.__self__
	paginator = client.get_paginator(method.__name__)
	for page in paginator.paginate(**kwargs).result_key_iters():
	for result in page:
	yield result


	if __name__ == '__main__':
	instance_id = 'i-0938587f3cc35d73f'
	event = {
	"version": "0",
	"id": "12345678-1234-1234-1234-123456789012",
	"detail-type": "EC2 Spot Instance Interruption Warning",
	"source": "aws.ec2",
	"account": "123456789012",
	"time": "yyyy-mm-ddThh:mm:ssZ",
	"region": "us-east-2",
	"resources": ["arn:aws:ec2:us-east-2:123456789012:instance/" + instance_id],
	"detail": {
	"instance-id": instance_id,
	"instance-action": "terminate"
	}
	}

	main(event, None)
	resource "aws_cloudwatch_event_rule" "spot_instance_termination_rule" {
	name = "SpotInstanceTerminationRule"
	description = "Events rule for Spot Instance termination notices"

	event_pattern = <<PATTERN
	{
	"detail-type": [
	"EC2 Spot Instance Interruption Warning"
	],
	"source": [
	"aws.ec2"
	]
	}
	PATTERN
	}

	resource "aws_cloudwatch_event_target" "spot_instance_termination_rule_target_lambda" {
	rule = "${aws_cloudwatch_event_rule.spot_instance_termination_rule.name}"
	target_id = "InvokeLambda"
	arn = "${aws_lambda_function.spot_instance_termination_lambda.arn}"
	}

	resource "aws_cloudwatch_event_target" "spot_instance_termination_rule_target_sns" {
	rule = "${aws_cloudwatch_event_rule.spot_instance_termination_rule.name}"
	target_id = "SendToSNS"
	arn = "${aws_sns_topic.spot_instance_termination_sns.arn}"
	}

	resource "aws_cloudwatch_log_group" "spot_instance_termination_lambda_log_group" {
	name = "/aws/lambda/${aws_lambda_function.spot_instance_termination_lambda.function_name}"
	retention_in_days = 60
	}
	resource "aws_iam_role" "spot_instance_termination_lambda_role" {
	name = "SpotInstanceTerminationLambda"
	description = "Allows the Spot Instance termination Lambda access to EC2 and ECS."

	assume_role_policy = <<POLICY
	{
	"Version": "2012-10-17",
	"Statement": [
	{
	"Effect": "Allow",
	"Principal": {
	"Service": "lambda.amazonaws.com"
	},
	"Action": "sts:AssumeRole"
	}
	]
	}
	POLICY
	}

	resource "aws_iam_role_policy_attachment" "spot_instance_termination_lambda_basic_attachment" {
	role = "${aws_iam_role.spot_instance_termination_lambda_role.name}"
	policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
	}

	resource "aws_iam_role_policy" "spot_instance_termination_lambda_policy_attach_inline" {
	name = "SpotInstanceTerminationLambda"
	role = "${aws_iam_role.spot_instance_termination_lambda_role.name}"

	policy = <<EOF
	{
	"Version": "2012-10-17",
	"Statement": [
	{
	"Action": [
	"ecs:Describe*",
	"ecs:List*",
	"ecs:UpdateContainerInstanceStatus"
	],
	"Effect": "Allow",
	"Resource": [
	"*"
	]
	}
	]
	}
	EOF
	}
	resource "aws_lambda_function" "spot_instance_termination_lambda" {
	function_name = "SpotInstanceTermination"
	description = "Handles Spot Instance termination messages by gracefully shutting down the instance."
	role = "${aws_iam_role.spot_instance_termination_lambda_role.arn}"

	handler = "_spot_instance_termination_handler.main"
	runtime = "python3.7"
	timeout = 30
	memory_size = 128
	kms_key_arn = "${data.aws_kms_key.lambda_kms.arn}"

	filename = "${data.archive_file.spot_instance_termination_lambda_zip.output_path}"
	source_code_hash = "${data.archive_file.spot_instance_termination_lambda_zip.output_base64sha256}"
	}

	data "archive_file" "spot_instance_termination_lambda_zip" {
	type = "zip"
	source_file = "_spot_instance_termination_handler.py"
	output_path = "dist/spot_instance_termination_lambda.zip"
	}

	resource "aws_lambda_permission" "spot_instance_termination_lambda_allow_cloudwatch" {
	statement_id = "AllowInvokeFromCloudWatch"
	action = "lambda:InvokeFunction"
	function_name = "${aws_lambda_function.spot_instance_termination_lambda.function_name}"
	principal = "events.amazonaws.com"
	source_arn = "${aws_cloudwatch_event_target.spot_instance_termination_rule_target_lambda.arn}"
	}
	resource "aws_sns_topic" "spot_instance_termination_sns" {
	name = "SpotInstanceTerminations"
	kms_master_key_id = "alias/aws/sns"
	}

	resource "aws_sns_topic_policy" "spot_instance_termination_sns_policy" {
	arn = "${aws_sns_topic.spot_instance_termination_sns.arn}"

	policy = <<POLICY
	{
	"Version": "2012-10-17",
	"Id": "snspolicy",
	"Statement": [{
	"Sid": "AllowPublishFromCloudWatchEvents",
	"Effect": "Allow",
	"Principal": {
	"Service": "events.amazonaws.com"
	},
	"Action": "sns:Publish",
	"Resource": "${aws_sns_topic.spot_instance_termination_sns.arn}"
	}]
	}
	POLICY
	}