Clean Up Old Image Tag#

AWS ECR 自带一个 Life Cycle Policy 的 功能能让没有被 Tag 的 image 自动在一定时间后删除. 但用户对 Life Cycle Policy 的需求是多种多样的, 我们希望能够更加精确的控制什么时候执行删除, 删除哪些 Image.

这里我提供了一个脚本用于自定义删除 ECR 的任务:

clean_up_old_image.py
  1# -*- coding: utf-8 -*-
  2
  3"""
  4Create at: 2024-09-06
  5Author: Sanhe Hu
  6
  7This script is used to clean up old images in ECR.
  8"""
  9
 10import typing as T
 11import dataclasses
 12from datetime import datetime, timezone
 13
 14import boto3
 15
 16
 17@dataclasses.dataclass
 18class Image:
 19    digest: str
 20    tags: T.List[str]
 21    pushed_at: datetime
 22
 23    def get_elapse(self, utc_now: datetime) -> int:
 24        return int((utc_now - self.pushed_at).total_seconds())
 25
 26
 27def get_images(
 28    ecr_client,
 29    repo_name: str,
 30) -> T.List[Image]:
 31
 32    paginator = ecr_client.get_paginator("describe_images")
 33    response_iterator = paginator.paginate(
 34        repositoryName=repo_name,
 35        filter={"tagStatus": "ANY"},
 36    )
 37    image_list = list()
 38    for response in response_iterator:
 39        image_detail_list = response.get("imageDetails", [])
 40        for image_detail in image_detail_list:
 41            image_digest = image_detail["imageDigest"]
 42            image_tags = image_detail.get("imageTags", [])
 43            image_pushed_at = image_detail["imagePushedAt"]
 44            if len(image_tags) == 0:
 45                continue
 46            image = Image(
 47                digest=image_digest,
 48                tags=image_tags,
 49                pushed_at=image_pushed_at,
 50            )
 51            image_list.append(image)
 52    return image_list
 53
 54
 55def do_we_delete_it(
 56    image: Image,
 57    utc_now: datetime,
 58    untagged_ttl: int,
 59    general_ttl: int,
 60) -> bool:
 61    """
 62    - Delete all image created long ago (based on TTL).
 63    - Delete all untagged images.
 64    """
 65    if "latest" in image.tags:
 66        return False
 67    elapse = image.get_elapse(utc_now)
 68    if len(image.tags) == 0:
 69        return elapse > untagged_ttl
 70    else:
 71        return elapse > general_ttl
 72
 73
 74def delete_old_image(
 75    ecr_client,
 76    repo_name: str,
 77    utc_now: datetime,
 78    untagged_ttl: int,
 79    general_ttl: int,
 80):
 81    image_list = get_images(ecr_client, repo_name)
 82    to_delete = list()
 83    for image in image_list:
 84        if do_we_delete_it(
 85            image=image,
 86            utc_now=utc_now,
 87            untagged_ttl=untagged_ttl,
 88            general_ttl=general_ttl,
 89        ):
 90            if image.tags:
 91                uri = "{}:{}".format(repo_name, "|".join(image.tags))
 92            else:
 93                uri = f"{repo_name}:{image.digest}"
 94            print(f"delete: {uri}")
 95            to_delete.append({"imageDigest": image.digest})
 96    if len(to_delete):
 97        ecr_client.batch_delete_image(
 98            repositoryName=repo_name,
 99            imageIds=to_delete,
100        )
101
102
103if __name__ == "__main__":
104    aws_profile = "bmt_app_devops_us_east_1"
105    repo_name = "simple_lbd_container"
106    general_ttl = 30 * 24 * 60 * 60
107    untagged_ttl = 1 * 24 * 60 * 60
108    boto_ses = boto3.Session(profile_name=aws_profile)
109    ecr_client = boto_ses.client("ecr")
110    utc_now = datetime.utcnow().replace(tzinfo=timezone.utc)
111
112    delete_old_image(
113        ecr_client=ecr_client,
114        repo_name=repo_name,
115        utc_now=utc_now,
116        untagged_ttl=untagged_ttl,
117        general_ttl=general_ttl,
118    )