ci: Add a scheduled action that purges the custom CI cache

Adds a new Python script and a scheduled Github Actions workflow for
cleaning up the custom CI cache bucket.

Needed to prevent the cache bucket from growing infinitely large.
We currently use Cloudflare R2, which does not support any kind of
automatic object retention lifecycle.
This commit is contained in:
Christoph Herzog
2023-02-15 12:25:35 +01:00
committed by Christoph Herzog
parent 8696abd29e
commit abd03ec5a5
2 changed files with 73 additions and 0 deletions

44
.github/s3-cache-cleanup.py vendored Executable file
View File

@ -0,0 +1,44 @@
#!/usr/bin/env python3
# Deletes all objects in an S3 bucket that are older than a given number of days.
# Used for cleaning up the custom Github Actions cache.
import boto3
import datetime
import os
# Define the S3 bucket name and the number of days to retain objects
days_to_retain = 7
bucket_name = os.environ['AWS_BUCKET_NAME']
access_key = os.environ['AWS_ACCESS_KEY_ID']
secret_key = os.environ['AWS_SECRET_ACCESS_KEY']
endpoint = os.environ['AWS_ENDPOINT']
# Create a connection to the S3 service
s3 = boto3.resource('s3',
endpoint_url = endpoint,
aws_access_key_id = access_key,
aws_secret_access_key = secret_key,
region_name = 'auto',
)
bucket = s3.Bucket(bucket_name)
# Calculate the retention date.
cutoff_date = (datetime.datetime.now() - datetime.timedelta(days=days_to_retain))
cutoff_date = cutoff_date.replace(tzinfo=datetime.timezone.utc)
print(f'Deleting all objects in bucket {bucket_name} older than {cutoff_date}...')
total_count = 0
deleted_count = 0
for obj in bucket.objects.all():
total_count += 1
if obj.last_modified < cutoff_date:
print(f'Deleting {obj.key}...')
obj.delete()
deleted_count += 1
print(f'Complete! Deleted {deleted_count} objects out of a total {total_count}.')

View File

@ -0,0 +1,29 @@
# Cleans up the custom Github Actions cache bucket.
name: Actions Cache Bucket Cleanup
on:
schedule:
# Run once a day.
- cron: "0 3 * * *"
# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
# This workflow contains a single job called "cron"
cron:
# The type of runner that the job will run on
runs-on: ubuntu-latest
steps:
- name: Install boto3 library
run: pip install boto3
- name: Clone repository
uses: actions/checkout@v3
- name: Run cleanup
env:
AWS_ENDPOINT: https://1541b1e8a3fc6ad155ce67ef38899700.r2.cloudflarestorage.com
AWS_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_ARTIFACTS_CACHE_ACCESS_TOKEN }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_ARTIFACTS_CACHE_ACCESS_KEY }}
AWS_BUCKET_NAME: wasmer-rust-artifacts-cache
run: |
./.github/s3-cache-cleanup.py