CloudFormation Custom Resource - 01 Example Update Custom Resource#

Keywords: AWS, Amazon, CloudFormation

本文是 AWS CloudFormation Custom Resources 的后续进阶博文. 给出了详细的最佳实践.

下面 lambda_function_example.py 的例子是用来 handle custom resource 的 Lambda Function 的最佳实现. 里面的 process() 特意留空, 以便用户自行实现具体的业务逻辑, 并给出了一些建议. 其他地方都是一些通用的标准最佳实践.

lambda_function_example.py
  1# -*- coding: utf-8 -*-
  2
  3"""
  4Sample CloudFormation Custom Resource Handler lambda function code.
  5"""
  6
  7import typing as T
  8import json
  9import traceback
 10import dataclasses
 11
 12import urllib3  # this is not a standard library, but this is available as a boto3 dependency
 13
 14# if you need to make HTTP request, you can use the standard library ``urllib.request``
 15# import urllib.request
 16
 17
 18# ------------------------------------------------------------------------------
 19# Helpers
 20# ------------------------------------------------------------------------------
 21http = urllib3.PoolManager()
 22
 23T_DATA = T.Dict[str, T.Any]
 24
 25
 26@dataclasses.dataclass
 27class Base:
 28    @classmethod
 29    def from_dict(cls, dct: T_DATA):
 30        kwargs = {}
 31        for field in dataclasses.fields(cls):
 32            if field.name in dct:
 33                kwargs[field.name] = dct[field.name]
 34        return cls(**kwargs)
 35
 36    def to_dict(self) -> T_DATA:
 37        return dataclasses.asdict(self)
 38
 39
 40class RequestTypeEnum:
 41    Create = "Create"
 42    Update = "Update"
 43    Delete = "Delete"
 44
 45
 46@dataclasses.dataclass
 47class Request(Base):
 48    """
 49    The request object for CFN custom
 50
 51    Reference:
 52
 53    - Request: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-responses.html
 54    """
 55
 56    RequestType: str = dataclasses.field()
 57    ResponseURL: str = dataclasses.field()
 58    StackId: str = dataclasses.field()
 59    RequestId: str = dataclasses.field()
 60    ResourceType: str = dataclasses.field()
 61    LogicalResourceId: str = dataclasses.field()
 62    PhysicalResourceId: T.Optional[str] = dataclasses.field(default=None)
 63    ResourceProperties: T.Optional[T_DATA] = dataclasses.field(default=None)
 64    OldResourceProperties: T.Optional[T_DATA] = dataclasses.field(default=None)
 65
 66    def is_create_type(self) -> bool:
 67        return self.RequestType == RequestTypeEnum.Create
 68
 69    def is_update_type(self) -> bool:
 70        return self.RequestType == RequestTypeEnum.Update
 71
 72    def is_delete_type(self) -> bool:
 73        return self.RequestType == RequestTypeEnum.Delete
 74
 75
 76class StatusEnum:
 77    SUCCESS = "SUCCESS"
 78    FAILED = "FAILED"
 79
 80
 81@dataclasses.dataclass
 82class Response(Base):
 83    """
 84    The response object for CFN custom resource.
 85
 86    Reference:
 87
 88    - Response https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-requests.html
 89    """
 90
 91    Status: str = dataclasses.field()
 92    PhysicalResourceId: str = dataclasses.field()
 93    StackId: str = dataclasses.field()
 94    RequestId: str = dataclasses.field()
 95    LogicalResourceId: str = dataclasses.field()
 96    NoEcho: bool = dataclasses.field(default=False)
 97    Data: T_DATA = dataclasses.field(default_factory=dict)
 98    Reason: str = dataclasses.field(default=None)
 99
100
101def send_to_cloudformation(
102    request: Request,
103    context,
104    response_status: str,
105    response_data: T.Dict[str, T.Any],
106    physical_resource_id: T.Optional[str] = None,
107    no_echo: bool = False,
108):
109    """
110    Send the CFN custom resource response back to CFN.
111
112    Reference:
113
114    - Request: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-responses.html
115    - Response https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-requests.html
116
117    :param request: the original lambda event object, which is a :class:`Request` object
118    :param context: the original lambda context object
119    :param response_status: SUCCESS or FAILED, you decide
120    :param response_data: user data you want to return as Custom Resource attribute
121    :param physical_resource_id: This value should be an identifier unique to the
122        custom resource vendor, and can be up to 1 KB in size. The value must be
123        a non-empty string and must be identical for all responses for the same resource.
124    :param no_echo: Optional. Indicates whether to mask the output of the
125        custom resource when retrieved by using the Fn::GetAtt function.
126        If set to true, all returned values are masked with asterisks (*****),
127        except for those stored in the Metadata section of the template.
128    """
129    response = Response(
130        Status=response_status,
131        PhysicalResourceId=physical_resource_id or context.log_stream_name,
132        StackId=request.StackId,
133        RequestId=request.RequestId,
134        LogicalResourceId=request.LogicalResourceId,
135        NoEcho=no_echo,
136        Data=response_data,
137        Reason="See the details in CloudWatch Log Stream: " + context.log_stream_name,
138    )
139    # This looks nice in cloudwatch log
140    print("---------- Response body ----------")
141    json_response_body = json.dumps(response.to_dict())
142    print(f"{json_response_body = }")
143
144    headers = {
145        "content-type": "application/json",
146        "content-length": str(len(json_response_body)),
147    }
148    try:
149        response = http.request(
150            "PUT",
151            request.ResponseURL,
152            body=json_response_body.encode("utf-8"),
153            headers=headers,
154        )
155        print("Status code: " + response.reason)
156    except Exception as e:
157        print("send(...) failed executing requests.put(...): " + str(e))
158        raise e
159
160
161def lambda_handler(event, context):
162    print("========== Start Lambda Function ==========")
163    print("---------- Event body ----------")
164    event_body = json.dumps(event)
165    print(f"{event_body = }")
166
167    request = Request.from_dict(event)
168    try:
169        result = process(request=request, context=context)
170        send_to_cloudformation(
171            request=request,
172            context=context,
173            response_status=result.response_status,
174            response_data=result.data,
175            physical_resource_id=result.physical_resource_id,
176            no_echo=result.no_echo,
177        )
178        status_code = 200
179    # your custom request processing logic my have bug,
180    # it is very important to catch all exceptions and send a FAILED signal back to CFN
181    # otherwise, CFN will wait for the response for one hour (by default)
182    except Exception as e:
183        print("---------- failed to process request, see traceback below ----------")
184        tb_string = traceback.format_exc(limit=20)
185        print(tb_string)
186        # send a failed signal back to CFN
187        send_to_cloudformation(
188            request=request,
189            context=context,
190            response_status=StatusEnum.FAILED,
191            response_data={"error": str(e)},
192            physical_resource_id=request.PhysicalResourceId,
193            no_echo=False,
194        )
195        status_code = 400
196
197    print("========== End Lambda Function ==========")
198    return {"statusCode": status_code}
199
200
201# ------------------------------------------------------------------------------
202# Implement this
203# ------------------------------------------------------------------------------
204@dataclasses.dataclass
205class Result:
206    """
207    The result of the :func:`process` function.
208    """
209
210    data: T_DATA = dataclasses.field()
211    physical_resource_id: str = dataclasses.field()
212    response_status: str = dataclasses.field(default=StatusEnum.SUCCESS)
213    no_echo: bool = dataclasses.field(default=False)
214
215
216def process(
217    request: Request,
218    context,
219) -> Result:
220    """
221    Put your custom resource request processing logic here.
222
223    :param request: the original lambda event object, which is a :class:`Request` object
224
225    :return: a :class:`Result` object, which provide necessary information to
226        create the :class:`Response` object.
227    :param context: the original lambda context object.
228
229    Here's some hint:
230
231    - if you believe the cloudformation deployment should not proceed, set the
232        ``Result.status`` to ``StatusEnum.FAILED``.
233    - include logic to handle ``if request.is_create_type():``,
234        ``if request.is_update_type():``, ``if request.is_delete_type():``.
235    - when request type is Create, you should use a deterministic physical resource id,
236            such as ``context.log_stream_name`` or a hard coded value.
237    - when request type is update, if you believe your custom resource update logic
238        should be a simple update, then just set physical_resource_id to the same value
239        you used in Create logic branch
240    - when request type is update, if you believe your custom resource update logic
241        should be an update then delete old one, then you should set a different
242        physical_resource_id value than the one you used in Create logic branch,
243        for example, you can use ``uuid.uuid4().hex``.
244    """
245    raise NotImplementedError

lambda_function.py 则是一个具体的实现, 用于演示分别在 Create, Update, Delete Request 的情况下, 如何分别对其进行处理, 以及演示返回不同的 PhysicalResourceId 会造成什么影响. 这里的重要结论就是, 如果你 Update 的时候返回的 PhysicalResourceId 不一样, 那么会先 Update 然后 Delete 旧的, 那么会多发送一个 Delete Request.

lambda_function.py
  1# -*- coding: utf-8 -*-
  2
  3"""
  4Sample CloudFormation Custom Resource Handler lambda function code.
  5"""
  6
  7import typing as T
  8import json
  9import traceback
 10import dataclasses
 11
 12import urllib3  # this is not a standard library, but this is available as a boto3 dependency
 13
 14# if you need to make HTTP request, you can use the standard library ``urllib.request``
 15# import urllib.request
 16
 17# ------------------------------------------------------------------------------
 18# Helpers
 19# ------------------------------------------------------------------------------
 20http = urllib3.PoolManager()
 21
 22T_DATA = T.Dict[str, T.Any]
 23
 24
 25@dataclasses.dataclass
 26class Base:
 27    @classmethod
 28    def from_dict(cls, dct: T_DATA):
 29        kwargs = {}
 30        for field in dataclasses.fields(cls):
 31            if field.name in dct:
 32                kwargs[field.name] = dct[field.name]
 33        return cls(**kwargs)
 34
 35    def to_dict(self) -> T_DATA:
 36        return dataclasses.asdict(self)
 37
 38
 39class RequestTypeEnum:
 40    Create = "Create"
 41    Update = "Update"
 42    Delete = "Delete"
 43
 44
 45@dataclasses.dataclass
 46class Request(Base):
 47    """
 48    The request object for CFN custom
 49
 50    Reference:
 51
 52    - Request: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-responses.html
 53    """
 54
 55    RequestType: str = dataclasses.field()
 56    ResponseURL: str = dataclasses.field()
 57    StackId: str = dataclasses.field()
 58    RequestId: str = dataclasses.field()
 59    ResourceType: str = dataclasses.field()
 60    LogicalResourceId: str = dataclasses.field()
 61    PhysicalResourceId: T.Optional[str] = dataclasses.field(default=None)
 62    ResourceProperties: T.Optional[T_DATA] = dataclasses.field(default=None)
 63    OldResourceProperties: T.Optional[T_DATA] = dataclasses.field(default=None)
 64
 65    def is_create_type(self) -> bool:
 66        return self.RequestType == RequestTypeEnum.Create
 67
 68    def is_update_type(self) -> bool:
 69        return self.RequestType == RequestTypeEnum.Update
 70
 71    def is_delete_type(self) -> bool:
 72        return self.RequestType == RequestTypeEnum.Delete
 73
 74
 75class StatusEnum:
 76    SUCCESS = "SUCCESS"
 77    FAILED = "FAILED"
 78
 79
 80@dataclasses.dataclass
 81class Response(Base):
 82    """
 83    The response object for CFN custom resource.
 84
 85    Reference:
 86
 87    - Response https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-requests.html
 88    """
 89
 90    Status: str = dataclasses.field()
 91    PhysicalResourceId: str = dataclasses.field()
 92    StackId: str = dataclasses.field()
 93    RequestId: str = dataclasses.field()
 94    LogicalResourceId: str = dataclasses.field()
 95    NoEcho: bool = dataclasses.field(default=False)
 96    Data: T_DATA = dataclasses.field(default_factory=dict)
 97    Reason: str = dataclasses.field(default=None)
 98
 99
100def send_to_cloudformation(
101    request: Request,
102    context,
103    response_status: str,
104    response_data: T.Dict[str, T.Any],
105    physical_resource_id: T.Optional[str] = None,
106    no_echo: bool = False,
107):
108    """
109    Send the CFN custom resource response back to CFN.
110
111    Reference:
112
113    - Request: https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-responses.html
114    - Response https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/crpg-ref-requests.html
115
116    :param request: the original lambda event object, which is a :class:`Request` object
117    :param context: the original lambda context object
118    :param response_status: SUCCESS or FAILED, you decide
119    :param response_data: user data you want to return as Custom Resource attribute
120    :param physical_resource_id: This value should be an identifier unique to the
121        custom resource vendor, and can be up to 1 KB in size. The value must be
122        a non-empty string and must be identical for all responses for the same resource.
123    :param no_echo: Optional. Indicates whether to mask the output of the
124        custom resource when retrieved by using the Fn::GetAtt function.
125        If set to true, all returned values are masked with asterisks (*****),
126        except for those stored in the Metadata section of the template.
127    """
128    response = Response(
129        Status=response_status,
130        PhysicalResourceId=physical_resource_id or context.log_stream_name,
131        StackId=request.StackId,
132        RequestId=request.RequestId,
133        LogicalResourceId=request.LogicalResourceId,
134        NoEcho=no_echo,
135        Data=response_data,
136        Reason="See the details in CloudWatch Log Stream: " + context.log_stream_name,
137    )
138    # This looks nice in cloudwatch log
139    print("---------- Response body ----------")
140    json_response_body = json.dumps(response.to_dict())
141    print(f"{json_response_body = }")
142
143    headers = {
144        "content-type": "application/json",
145        "content-length": str(len(json_response_body)),
146    }
147    try:
148        response = http.request(
149            "PUT",
150            request.ResponseURL,
151            body=json_response_body.encode("utf-8"),
152            headers=headers,
153        )
154        print("Status code: " + response.reason)
155    except Exception as e:
156        print("send(...) failed executing requests.put(...): " + str(e))
157        raise e
158
159
160def lambda_handler(event, context):
161    print("========== Start Lambda Function ==========")
162    print("---------- Event body ----------")
163    event_body = json.dumps(event)
164    print(f"{event_body = }")
165
166    request = Request.from_dict(event)
167    try:
168        result = process(request=request, context=context)
169        send_to_cloudformation(
170            request=request,
171            context=context,
172            response_status=result.response_status,
173            response_data=result.data,
174            physical_resource_id=result.physical_resource_id,
175            no_echo=result.no_echo,
176        )
177        status_code = 200
178    # your custom request processing logic my have bug,
179    # it is very important to catch all exceptions and send a FAILED signal back to CFN
180    # otherwise, CFN will wait for the response for one hour (by default)
181    except Exception as e:
182        print("---------- failed to process request, see traceback below ----------")
183        tb_string = traceback.format_exc(limit=20)
184        print(tb_string)
185        # send a failed signal back to CFN
186        send_to_cloudformation(
187            request=request,
188            context=context,
189            response_status=StatusEnum.FAILED,
190            response_data={"error": str(e)},
191            physical_resource_id=request.PhysicalResourceId,
192            no_echo=False,
193        )
194        status_code = 400
195
196    print("========== End Lambda Function ==========")
197    return {"statusCode": status_code}
198
199
200# ------------------------------------------------------------------------------
201# Implement this
202# ------------------------------------------------------------------------------
203@dataclasses.dataclass
204class Result:
205    """
206    The result of the :func:`process` function.
207    """
208
209    data: T_DATA = dataclasses.field()
210    physical_resource_id: str = dataclasses.field()
211    response_status: str = dataclasses.field(default=StatusEnum.SUCCESS)
212    no_echo: bool = dataclasses.field(default=False)
213
214
215import random
216from datetime import datetime
217import boto3
218
219aws_region = "us-east-1"
220boto_ses = boto3.Session(region_name=aws_region)
221aws_account_id = boto_ses.client("sts").get_caller_identity()["Account"]
222s3_client = boto3.client("s3")
223
224
225def process(
226    request: Request,
227    context,
228) -> Result:
229    """
230    Put your custom resource request processing logic here.
231
232    :param request: the original lambda event object, which is a :class:`Request` object
233    :param context: the original lambda context object.
234
235    :return: a :class:`Result` object, which provide necessary information to
236        create the :class:`Response` object.
237
238    Here's some hint:
239
240    - if you believe the cloudformation deployment should not proceed, set the
241        ``Result.status`` to ``StatusEnum.FAILED``.
242    - include logic to handle ``if request.is_create_type():``,
243        ``if request.is_update_type():``, ``if request.is_delete_type():``.
244    - when request type is Create, you should use a deterministic physical resource id,
245            such as ``context.log_stream_name`` or a hard coded value.
246    - when request type is update, if you believe your custom resource update logic
247        should be a simple update, then just set physical_resource_id to the same value
248        you used in Create logic branch
249    - when request type is update, if you believe your custom resource update logic
250        should be an update then delete old one, then you should set a different
251        physical_resource_id value than the one you used in Create logic branch,
252        for example, you can use ``uuid.uuid4().hex``.
253    """
254    # create s3 object for debug
255    s3_client.put_object(
256        Bucket=f"{aws_account_id}-{aws_region}-data",
257        Key=(
258            "projects/aws-lambda-backed-custom-resources-poc/01-Example-Update-Custom-Resource/requests/"
259            f"{str(datetime.now())}.json"
260        ),
261        Body=json.dumps(request.to_dict(), indent=4),
262    )
263    data = {"name": f"CustomResourcePocIamGroup{random.randint(100, 999)}"}
264    print("---------- Response data ----------")
265    data_body = json.dumps(data)
266    print(f"{data_body = }")
267
268    if request.is_create_type():
269        physical_resource_id = "the-initial-physical-resource-id"
270    elif request.is_update_type():
271        physical_resource_id = "the-update-physical-resource-id"
272    elif request.is_delete_type():
273        physical_resource_id = "the-delete-physical-resource-id"
274    else:  # pragma: no cover
275        raise ValueError(f"Unknown RequestType: {request.RequestType}")
276
277    return Result(
278        data=data,
279        physical_resource_id=physical_resource_id,
280        response_status=StatusEnum.SUCCESS,
281        no_echo=False,
282    )

deploy_cf.py 则是一个真实的 CloudFormation Template, 里面的关键点有两个:

  1. 用 ServiceTimeout 来设置超时时间, 以防止 CloudFormation 卡死.

  2. 用 client_token 来保证 Update Stack 的时候同时也 Update Custom Resource.

deploy_cf.py
  1# -*- coding: utf-8 -*-
  2
  3"""
  4This script deploy the CloudFormation to AWS.
  5"""
  6
  7import json
  8import uuid
  9from pathlib_mate import Path
 10from s3pathlib import S3Path, context
 11from boto_session_manager import BotoSesManager
 12import aws_cloudformation as aws_cf
 13
 14aws_profile = "bmt_app_dev_us_east_1"
 15iam_role_arn = "arn:aws:iam::{aws_account_id}:role/lambda-power-user-role"
 16name_prefix = "custom_resource_poc_example_02"
 17
 18# ------------------------------------------------------------------------------
 19# Don't change the code below
 20# ------------------------------------------------------------------------------
 21bsm = BotoSesManager(profile_name=aws_profile)
 22context.attach_boto_session(bsm.boto_ses)
 23iam_role_arn = iam_role_arn.format(aws_account_id=bsm.aws_account_id)
 24
 25dir_here = Path.dir_here(__file__)
 26path_lbd_func = dir_here / "lambda_function.py"
 27path_lbd_deploy_package = dir_here / "lambda.zip"
 28path_lbd_func.make_zip_archive(path_lbd_deploy_package, overwrite=True)
 29
 30s3dir = S3Path(
 31    f"s3://{bsm.aws_account_id}-{bsm.aws_region}-data"
 32    f"/projects/aws-lambda-backed-custom-resources-poc/01-Example-Update-Custom-Resource/"
 33).to_dir()
 34s3path_lbd_deploy_package = s3dir / "lambda.zip"
 35s3path_lbd_deploy_package.upload_file(path_lbd_deploy_package, overwrite=True)
 36
 37custom_resource_request_handler_lbd_func_name = (
 38    f"{name_prefix}-custom-resource-request-handler"
 39)
 40template = {
 41    "AWSTemplateFormatVersion": "2010-09-09",
 42    "Resources": {
 43        "LambdaFunctionCustomerResourceHandler": {
 44            "Type": "AWS::Lambda::Function",
 45            "Properties": {
 46                "FunctionName": custom_resource_request_handler_lbd_func_name,
 47                "Handler": "lambda_function.lambda_handler",
 48                "Role": iam_role_arn,
 49                "Code": {
 50                    "S3Bucket": s3path_lbd_deploy_package.bucket,
 51                    "S3Key": s3path_lbd_deploy_package.key,
 52                },
 53                "Runtime": "python3.11",
 54                "MemorySize": 128,
 55                "Timeout": 30,
 56                "Environment": {
 57                    "Variables": {"CODE_HASH": str(hash(path_lbd_func.read_text()))}
 58                },
 59            },
 60        },
 61        # expected response object
 62        # :param name: the name of the IAM group
 63        "IamGroupInfo": {
 64            "Type": "Custom::IamGroupInfo",
 65            "Properties": {
 66                "ServiceToken": f"arn:aws:lambda:{bsm.aws_region}:{bsm.aws_account_id}:function:{custom_resource_request_handler_lbd_func_name}",
 67                # don't forget to set ServiceTimeout to a shorter value
 68                # otherwise you will wait for an hour if you made a mistake in your request lambda handler
 69                "ServiceTimeout": 30,
 70                # use a random generated value to send a update request to the request lambda handler
 71                # this value should comes from the CloudFormation Parameter
 72                "client_token": uuid.uuid4().hex,
 73            },
 74            # make sure this depends on the lambda function handler
 75            "DependsOn": "LambdaFunctionCustomerResourceHandler",
 76        },
 77        "IamGroup": {
 78            "Type": "AWS::IAM::Group",
 79            "Properties": {"GroupName": {"Fn::GetAtt": ["IamGroupInfo", "name"]}},
 80            # make sure this depends on the custom resource
 81            "DependsOn": "IamGroupInfo",
 82        },
 83    },
 84}
 85stack_name = name_prefix.replace("_", "-")
 86
 87
 88def deploy_stack():
 89    aws_cf.deploy_stack(
 90        bsm=bsm,
 91        stack_name=stack_name,
 92        template=json.dumps(template),
 93        include_named_iam=True,
 94        skip_prompt=True,
 95    )
 96
 97
 98def delete_stack():
 99    aws_cf.remove_stack(
100        bsm=bsm,
101        stack_name=stack_name,
102        skip_prompt=True,
103    )
104
105
106if __name__ == "__main__":
107    deploy_stack()
108    # delete_stack()
109    # pass