From 41186600c57d4a3fcdd436abb0ccbd3e46aeb21f Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Thu, 25 Jan 2024 00:10:38 +0000 Subject: [PATCH 01/11] Add whisper Dockerfile and associated github action to publish to github container registry --- .github/workflows/build-whisper-docker.yml | 48 ++++++++++++++++++++++ whisper_container/Dockerfile | 9 ++++ 2 files changed, 57 insertions(+) create mode 100644 .github/workflows/build-whisper-docker.yml create mode 100644 whisper_container/Dockerfile diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml new file mode 100644 index 00000000..a203887d --- /dev/null +++ b/.github/workflows/build-whisper-docker.yml @@ -0,0 +1,48 @@ +# +name: Build and publish whisper docker image + +# Configures this workflow to run every time a change is pushed to the branch called `release`. +on: + push: + branches: ['whisper-docker'] + +# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. +jobs: + build-and-push-image: + runs-on: ubuntu-latest + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + # + steps: + - name: Checkout repository + uses: actions/checkout@v4 + # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. + # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. + # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. + - name: Build and push Docker image + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: whisper_container/ + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/whisper_container/Dockerfile b/whisper_container/Dockerfile new file mode 100644 index 00000000..11fa7c35 --- /dev/null +++ b/whisper_container/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:22.04 +WORKDIR /opt + +RUN apt-get update +RUN apt-get install -y ffmpeg curl wget git build-essential +RUN git clone https://github.com/ggerganov/whisper.cpp +RUN cd whisper.cpp && make +RUN bash /opt/whisper.cpp/models/download-ggml-model.sh medium +RUN bash /opt/whisper.cpp/models/download-ggml-model.sh large-v2 \ No newline at end of file From b616667f8a51eac9de42edbcf2e4f6722a43fd67 Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Thu, 25 Jan 2024 00:43:49 +0000 Subject: [PATCH 02/11] push x86 and arm64 versions --- .github/workflows/build-whisper-docker.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index a203887d..c3640ac6 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -40,9 +40,10 @@ jobs: # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. - name: Build and push Docker image - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + uses: docker/build-push-action@v5 with: context: whisper_container/ + platforms: linux/amd64,linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From db0d7e6e8ab65a263f0776d278d99fd15d8d1707 Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Thu, 25 Jan 2024 00:57:23 +0000 Subject: [PATCH 03/11] add buildx step to try get multi platform to work --- .github/workflows/build-whisper-docker.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index c3640ac6..6ff9e16c 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -23,9 +23,12 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + uses: docker/login-action@v3 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -33,7 +36,7 @@ jobs: # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. - name: Extract metadata (tags, labels) for Docker id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. From 4b9d5578bb10af2dd56af8efbbaa6c229bf10a7f Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Thu, 25 Jan 2024 16:17:20 +0000 Subject: [PATCH 04/11] Remove large model, improve documentation, only build when Dockerfile changes --- .github/workflows/build-whisper-docker.yml | 10 +++++++--- whisper_container/Dockerfile | 5 ++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index 6ff9e16c..c7d59219 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -3,8 +3,11 @@ name: Build and publish whisper docker image # Configures this workflow to run every time a change is pushed to the branch called `release`. on: + workflow_dispatch: push: - branches: ['whisper-docker'] + branches: ['whisper-docker', 'main'] + paths: + - "whisper_container/Dockerfile" # Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. env: @@ -19,7 +22,7 @@ jobs: permissions: contents: read packages: write - # + steps: - name: Checkout repository uses: actions/checkout@v4 @@ -46,7 +49,8 @@ jobs: uses: docker/build-push-action@v5 with: context: whisper_container/ - platforms: linux/amd64,linux/arm64 + # to add x86: linux/amd64 + platforms: linux/arm64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/whisper_container/Dockerfile b/whisper_container/Dockerfile index 11fa7c35..27d421f8 100644 --- a/whisper_container/Dockerfile +++ b/whisper_container/Dockerfile @@ -1,9 +1,12 @@ FROM ubuntu:22.04 WORKDIR /opt +LABEL com.theguardian.transcription-service.whisper-container="Whisper.cpp container with models downloaded, including ffmpeg" RUN apt-get update RUN apt-get install -y ffmpeg curl wget git build-essential RUN git clone https://github.com/ggerganov/whisper.cpp RUN cd whisper.cpp && make RUN bash /opt/whisper.cpp/models/download-ggml-model.sh medium -RUN bash /opt/whisper.cpp/models/download-ggml-model.sh large-v2 \ No newline at end of file + +# Large model not currently in use - but we might want to add it as an option at some point +#RUN bash /opt/whisper.cpp/models/download-ggml-model.sh large-v2 \ No newline at end of file From 1e81f26177d65bfed1345dfd1cdbcd073365edc9 Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Thu, 25 Jan 2024 16:46:42 +0000 Subject: [PATCH 05/11] add workflow file to trigger --- .github/workflows/build-whisper-docker.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index c7d59219..4cf03de8 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -5,9 +5,10 @@ name: Build and publish whisper docker image on: workflow_dispatch: push: - branches: ['whisper-docker', 'main'] + branches: ['main'] paths: - "whisper_container/Dockerfile" + - ".github/workflows/build-whisper-docker.yml" # Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. env: From dc7a74cbc5fb92cfe2380357f332bd8c0fb6e702 Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Mon, 29 Jan 2024 13:17:12 +0000 Subject: [PATCH 06/11] Add ECR repository and attempt to publish to it in workflow --- .github/workflows/build-whisper-docker.yml | 22 ++++++++++-- packages/cdk/bin/cdk.ts | 8 +++++ packages/cdk/lib/repository.ts | 42 ++++++++++++++++++++++ 3 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 packages/cdk/lib/repository.ts diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index 4cf03de8..5d6bd366 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -5,7 +5,7 @@ name: Build and publish whisper docker image on: workflow_dispatch: push: - branches: ['main'] + branches: ['main', 'whisper-docker'] paths: - "whisper_container/Dockerfile" - ".github/workflows/build-whisper-docker.yml" @@ -23,6 +23,7 @@ jobs: permissions: contents: read packages: write + id-token: write steps: - name: Checkout repository @@ -30,7 +31,7 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + # Uses the `docker/login-action` action to log in to the Container registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. - name: Log in to the Container registry uses: docker/login-action@v3 with: @@ -42,7 +43,20 @@ jobs: id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + images: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + ${{ secrets.TRANSCRIPTION_SERVICE_ECR_URI }} + tags: | + type=sha + type=raw,value=latest + - uses: aws-actions/configure-aws-credentials@v4 + name: Configure AWS credentials for pushing to ECR + with: + role-to-assume: ${{ secrets.INVESTIGATIONS_GITHUB_ACTIONS_ROLE_ARN }} + aws-region: eu-west-1 + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. @@ -55,3 +69,5 @@ jobs: push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + + diff --git a/packages/cdk/bin/cdk.ts b/packages/cdk/bin/cdk.ts index 8ab432b1..9e9f456a 100644 --- a/packages/cdk/bin/cdk.ts +++ b/packages/cdk/bin/cdk.ts @@ -1,5 +1,6 @@ import 'source-map-support/register'; import { GuRoot } from '@guardian/cdk/lib/constructs/root'; +import {TranscriptionServiceRepository} from "../lib/repository"; import { TranscriptionService } from '../lib/transcription-service'; const app = new GuRoot(); @@ -13,3 +14,10 @@ new TranscriptionService(app, 'TranscriptionService-PROD', { stage: 'PROD', env: { region: 'eu-west-1' }, }); + +// repository will be shared between CODE and PROD so needs to be a separate stack +new TranscriptionServiceRepository(app, 'TranscriptionServiceRepository', { + stack: 'investigations', + stage: 'PROD', + env: { region: 'eu-west-1' }, +}) \ No newline at end of file diff --git a/packages/cdk/lib/repository.ts b/packages/cdk/lib/repository.ts new file mode 100644 index 00000000..8d6c7450 --- /dev/null +++ b/packages/cdk/lib/repository.ts @@ -0,0 +1,42 @@ +import type {GuStackProps} from "@guardian/cdk/lib/constructs/core"; +import {GuStack, GuStringParameter} from "@guardian/cdk/lib/constructs/core"; +import type {App} from "aws-cdk-lib"; +import {RemovalPolicy} from "aws-cdk-lib"; +import {Repository, TagMutability} from "aws-cdk-lib/aws-ecr"; +import {ArnPrincipal, Effect, PolicyStatement} from "aws-cdk-lib/aws-iam"; + +export class TranscriptionServiceRepository extends GuStack { + constructor(scope: App, id: string, props: GuStackProps) { + super(scope, id, props); + const githubActionsIAMRoleArn = new GuStringParameter(this, "GithubActionsIAMRoleArn", { + description: "IAM role for role used by github actions workflows" + }) + const repository = new Repository(this, "TranscriptionServiceRepository", { + repositoryName: `transcription-service`, + lifecycleRules: [{ + maxImageCount: 5 + }], + imageTagMutability: TagMutability.IMMUTABLE, + removalPolicy: RemovalPolicy.DESTROY, + imageScanOnPush: true, + }) + repository.addToResourcePolicy(new PolicyStatement({ + principals: [new ArnPrincipal(githubActionsIAMRoleArn.valueAsString)], + actions: [ + "ecr:GetAuthorizationToken", + "ecr:BatchCheckLayerAvailability", + "ecr:GetDownloadUrlForLayer", + "ecr:GetRepositoryPolicy", + "ecr:DescribeRepositories", + "ecr:ListImages", + "ecr:DescribeImages", + "ecr:BatchGetImage", + "ecr:InitiateLayerUpload", + "ecr:UploadLayerPart", + "ecr:CompleteLayerUpload", + "ecr:PutImage" + ], + effect: Effect.ALLOW + })) + } +} \ No newline at end of file From f4dfab731a15749a329518b83bccba34e72b898a Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Tue, 30 Jan 2024 10:50:22 +0000 Subject: [PATCH 07/11] Add role for AMIgo to fetch containers from private ECR --- packages/cdk/lib/repository.ts | 42 +++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/packages/cdk/lib/repository.ts b/packages/cdk/lib/repository.ts index 8d6c7450..903f84de 100644 --- a/packages/cdk/lib/repository.ts +++ b/packages/cdk/lib/repository.ts @@ -1,9 +1,10 @@ import type {GuStackProps} from "@guardian/cdk/lib/constructs/core"; import {GuStack, GuStringParameter} from "@guardian/cdk/lib/constructs/core"; import type {App} from "aws-cdk-lib"; -import {RemovalPolicy} from "aws-cdk-lib"; -import {Repository, TagMutability} from "aws-cdk-lib/aws-ecr"; -import {ArnPrincipal, Effect, PolicyStatement} from "aws-cdk-lib/aws-iam"; +import {CfnOutput, RemovalPolicy} from "aws-cdk-lib"; +import {CfnPublicRepository, Repository, TagMutability} from "aws-cdk-lib/aws-ecr"; +import {AccountPrincipal, ArnPrincipal, Effect, PolicyDocument, PolicyStatement, Role} from "aws-cdk-lib/aws-iam"; +import {GuAssumeRolePolicy, GuRole} from "@guardian/cdk/lib/constructs/iam"; export class TranscriptionServiceRepository extends GuStack { constructor(scope: App, id: string, props: GuStackProps) { @@ -11,6 +12,9 @@ export class TranscriptionServiceRepository extends GuStack { const githubActionsIAMRoleArn = new GuStringParameter(this, "GithubActionsIAMRoleArn", { description: "IAM role for role used by github actions workflows" }) + const deployToolsAccountNumber = new GuStringParameter(this, "DeployToolsAccount", { + description: "Deploy tools account id - needed to give AMIgo access to this repository" + }) const repository = new Repository(this, "TranscriptionServiceRepository", { repositoryName: `transcription-service`, lifecycleRules: [{ @@ -38,5 +42,37 @@ export class TranscriptionServiceRepository extends GuStack { ], effect: Effect.ALLOW })) + + const repoAccessRole = new Role(this, "RepoAccessRole", { + roleName: "TranscriptionServiceRepoAccessRole", + assumedBy: new AccountPrincipal(deployToolsAccountNumber.valueAsString), + inlinePolicies: { + "TranscriptionServiceRepoAccessPolicy": new PolicyDocument({ + statements: [ + new PolicyStatement({ + actions: ["ecr:GetAuthorizationToken"], + resources: ["*"], + effect: Effect.ALLOW + }), + new PolicyStatement({ + actions: [ + "ecr:GetDownloadUrlForLayer", + "ecr:BatchGetImage", + "ecr:BatchCheckLayerAvailability", + "ecr:DescribeImages", + "ecr:ListImages", + "ecr:GetDownloadUrlForLayer" + ], + resources: [repository.repositoryArn], + effect: Effect.ALLOW + }) + ] + }) + } + }) + + new CfnOutput(this, "AccessRoleArn", { + value: repoAccessRole.roleArn + }) } } \ No newline at end of file From 63a655d669ff588d5b85ae8f24de810f2815d212 Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Tue, 30 Jan 2024 11:15:32 +0000 Subject: [PATCH 08/11] Prettier, fix package-lock --- package-lock.json | 68 +++++++------- packages/cdk/bin/cdk.ts | 4 +- packages/cdk/lib/repository.ts | 167 ++++++++++++++++++--------------- 3 files changed, 129 insertions(+), 110 deletions(-) diff --git a/package-lock.json b/package-lock.json index b40bac9e..be7461d1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3860,13 +3860,13 @@ }, "node_modules/aws-cdk-lib/node_modules/@balena/dockerignore": { "version": "1.0.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "Apache-2.0" }, "node_modules/aws-cdk-lib/node_modules/ajv": { "version": "8.12.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3882,7 +3882,7 @@ }, "node_modules/aws-cdk-lib/node_modules/ansi-regex": { "version": "5.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -3891,7 +3891,7 @@ }, "node_modules/aws-cdk-lib/node_modules/ansi-styles": { "version": "4.3.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3906,7 +3906,7 @@ }, "node_modules/aws-cdk-lib/node_modules/astral-regex": { "version": "2.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -3915,13 +3915,13 @@ }, "node_modules/aws-cdk-lib/node_modules/balanced-match": { "version": "1.0.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/brace-expansion": { "version": "1.1.11", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3931,7 +3931,7 @@ }, "node_modules/aws-cdk-lib/node_modules/case": { "version": "1.6.3", - "extraneous": true, + "dev": true, "inBundle": true, "license": "(MIT OR GPL-3.0-or-later)", "engines": { @@ -3940,7 +3940,7 @@ }, "node_modules/aws-cdk-lib/node_modules/color-convert": { "version": "2.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3952,31 +3952,31 @@ }, "node_modules/aws-cdk-lib/node_modules/color-name": { "version": "1.1.4", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/concat-map": { "version": "0.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/emoji-regex": { "version": "8.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/fast-deep-equal": { "version": "3.1.3", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/fs-extra": { "version": "11.2.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3990,13 +3990,13 @@ }, "node_modules/aws-cdk-lib/node_modules/graceful-fs": { "version": "4.2.11", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC" }, "node_modules/aws-cdk-lib/node_modules/ignore": { "version": "5.3.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4005,7 +4005,7 @@ }, "node_modules/aws-cdk-lib/node_modules/is-fullwidth-code-point": { "version": "3.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4014,13 +4014,13 @@ }, "node_modules/aws-cdk-lib/node_modules/json-schema-traverse": { "version": "1.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/jsonfile": { "version": "6.1.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4032,7 +4032,7 @@ }, "node_modules/aws-cdk-lib/node_modules/jsonschema": { "version": "1.4.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4041,13 +4041,13 @@ }, "node_modules/aws-cdk-lib/node_modules/lodash.truncate": { "version": "4.4.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/lru-cache": { "version": "6.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "dependencies": { @@ -4059,7 +4059,7 @@ }, "node_modules/aws-cdk-lib/node_modules/minimatch": { "version": "3.1.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "dependencies": { @@ -4071,7 +4071,7 @@ }, "node_modules/aws-cdk-lib/node_modules/punycode": { "version": "2.3.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4080,7 +4080,7 @@ }, "node_modules/aws-cdk-lib/node_modules/require-from-string": { "version": "2.0.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4089,7 +4089,7 @@ }, "node_modules/aws-cdk-lib/node_modules/semver": { "version": "7.5.4", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "dependencies": { @@ -4104,7 +4104,7 @@ }, "node_modules/aws-cdk-lib/node_modules/slice-ansi": { "version": "4.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4121,7 +4121,7 @@ }, "node_modules/aws-cdk-lib/node_modules/string-width": { "version": "4.2.3", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4135,7 +4135,7 @@ }, "node_modules/aws-cdk-lib/node_modules/strip-ansi": { "version": "6.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4147,7 +4147,7 @@ }, "node_modules/aws-cdk-lib/node_modules/table": { "version": "6.8.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "BSD-3-Clause", "dependencies": { @@ -4163,7 +4163,7 @@ }, "node_modules/aws-cdk-lib/node_modules/universalify": { "version": "2.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4172,7 +4172,7 @@ }, "node_modules/aws-cdk-lib/node_modules/uri-js": { "version": "4.4.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "BSD-2-Clause", "dependencies": { @@ -4181,13 +4181,13 @@ }, "node_modules/aws-cdk-lib/node_modules/yallist": { "version": "4.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC" }, "node_modules/aws-cdk-lib/node_modules/yaml": { "version": "1.10.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "engines": { diff --git a/packages/cdk/bin/cdk.ts b/packages/cdk/bin/cdk.ts index 9e9f456a..2003384e 100644 --- a/packages/cdk/bin/cdk.ts +++ b/packages/cdk/bin/cdk.ts @@ -1,6 +1,6 @@ import 'source-map-support/register'; import { GuRoot } from '@guardian/cdk/lib/constructs/root'; -import {TranscriptionServiceRepository} from "../lib/repository"; +import { TranscriptionServiceRepository } from '../lib/repository'; import { TranscriptionService } from '../lib/transcription-service'; const app = new GuRoot(); @@ -20,4 +20,4 @@ new TranscriptionServiceRepository(app, 'TranscriptionServiceRepository', { stack: 'investigations', stage: 'PROD', env: { region: 'eu-west-1' }, -}) \ No newline at end of file +}); diff --git a/packages/cdk/lib/repository.ts b/packages/cdk/lib/repository.ts index 903f84de..f5e70846 100644 --- a/packages/cdk/lib/repository.ts +++ b/packages/cdk/lib/repository.ts @@ -1,78 +1,97 @@ -import type {GuStackProps} from "@guardian/cdk/lib/constructs/core"; -import {GuStack, GuStringParameter} from "@guardian/cdk/lib/constructs/core"; -import type {App} from "aws-cdk-lib"; -import {CfnOutput, RemovalPolicy} from "aws-cdk-lib"; -import {CfnPublicRepository, Repository, TagMutability} from "aws-cdk-lib/aws-ecr"; -import {AccountPrincipal, ArnPrincipal, Effect, PolicyDocument, PolicyStatement, Role} from "aws-cdk-lib/aws-iam"; -import {GuAssumeRolePolicy, GuRole} from "@guardian/cdk/lib/constructs/iam"; +import type { GuStackProps } from '@guardian/cdk/lib/constructs/core'; +import { GuStack, GuStringParameter } from '@guardian/cdk/lib/constructs/core'; +import type { App } from 'aws-cdk-lib'; +import { CfnOutput, RemovalPolicy } from 'aws-cdk-lib'; +import { Repository, TagMutability } from 'aws-cdk-lib/aws-ecr'; +import { + AccountPrincipal, + ArnPrincipal, + Effect, + PolicyDocument, + PolicyStatement, + Role, +} from 'aws-cdk-lib/aws-iam'; export class TranscriptionServiceRepository extends GuStack { - constructor(scope: App, id: string, props: GuStackProps) { - super(scope, id, props); - const githubActionsIAMRoleArn = new GuStringParameter(this, "GithubActionsIAMRoleArn", { - description: "IAM role for role used by github actions workflows" - }) - const deployToolsAccountNumber = new GuStringParameter(this, "DeployToolsAccount", { - description: "Deploy tools account id - needed to give AMIgo access to this repository" - }) - const repository = new Repository(this, "TranscriptionServiceRepository", { - repositoryName: `transcription-service`, - lifecycleRules: [{ - maxImageCount: 5 - }], - imageTagMutability: TagMutability.IMMUTABLE, - removalPolicy: RemovalPolicy.DESTROY, - imageScanOnPush: true, - }) - repository.addToResourcePolicy(new PolicyStatement({ - principals: [new ArnPrincipal(githubActionsIAMRoleArn.valueAsString)], - actions: [ - "ecr:GetAuthorizationToken", - "ecr:BatchCheckLayerAvailability", - "ecr:GetDownloadUrlForLayer", - "ecr:GetRepositoryPolicy", - "ecr:DescribeRepositories", - "ecr:ListImages", - "ecr:DescribeImages", - "ecr:BatchGetImage", - "ecr:InitiateLayerUpload", - "ecr:UploadLayerPart", - "ecr:CompleteLayerUpload", - "ecr:PutImage" - ], - effect: Effect.ALLOW - })) + constructor(scope: App, id: string, props: GuStackProps) { + super(scope, id, props); + const githubActionsIAMRoleArn = new GuStringParameter( + this, + 'GithubActionsIAMRoleArn', + { + description: 'IAM role for role used by github actions workflows', + }, + ); + const deployToolsAccountNumber = new GuStringParameter( + this, + 'DeployToolsAccount', + { + description: + 'Deploy tools account id - needed to give AMIgo access to this repository', + }, + ); + const repository = new Repository(this, 'TranscriptionServiceRepository', { + repositoryName: `transcription-service`, + lifecycleRules: [ + { + maxImageCount: 5, + }, + ], + imageTagMutability: TagMutability.IMMUTABLE, + removalPolicy: RemovalPolicy.DESTROY, + imageScanOnPush: true, + }); + repository.addToResourcePolicy( + new PolicyStatement({ + principals: [new ArnPrincipal(githubActionsIAMRoleArn.valueAsString)], + actions: [ + 'ecr:GetAuthorizationToken', + 'ecr:BatchCheckLayerAvailability', + 'ecr:GetDownloadUrlForLayer', + 'ecr:GetRepositoryPolicy', + 'ecr:DescribeRepositories', + 'ecr:ListImages', + 'ecr:DescribeImages', + 'ecr:BatchGetImage', + 'ecr:InitiateLayerUpload', + 'ecr:UploadLayerPart', + 'ecr:CompleteLayerUpload', + 'ecr:PutImage', + ], + effect: Effect.ALLOW, + }), + ); - const repoAccessRole = new Role(this, "RepoAccessRole", { - roleName: "TranscriptionServiceRepoAccessRole", - assumedBy: new AccountPrincipal(deployToolsAccountNumber.valueAsString), - inlinePolicies: { - "TranscriptionServiceRepoAccessPolicy": new PolicyDocument({ - statements: [ - new PolicyStatement({ - actions: ["ecr:GetAuthorizationToken"], - resources: ["*"], - effect: Effect.ALLOW - }), - new PolicyStatement({ - actions: [ - "ecr:GetDownloadUrlForLayer", - "ecr:BatchGetImage", - "ecr:BatchCheckLayerAvailability", - "ecr:DescribeImages", - "ecr:ListImages", - "ecr:GetDownloadUrlForLayer" - ], - resources: [repository.repositoryArn], - effect: Effect.ALLOW - }) - ] - }) - } - }) + const repoAccessRole = new Role(this, 'RepoAccessRole', { + roleName: 'TranscriptionServiceRepoAccessRole', + assumedBy: new AccountPrincipal(deployToolsAccountNumber.valueAsString), + inlinePolicies: { + TranscriptionServiceRepoAccessPolicy: new PolicyDocument({ + statements: [ + new PolicyStatement({ + actions: ['ecr:GetAuthorizationToken'], + resources: ['*'], + effect: Effect.ALLOW, + }), + new PolicyStatement({ + actions: [ + 'ecr:GetDownloadUrlForLayer', + 'ecr:BatchGetImage', + 'ecr:BatchCheckLayerAvailability', + 'ecr:DescribeImages', + 'ecr:ListImages', + 'ecr:GetDownloadUrlForLayer', + ], + resources: [repository.repositoryArn], + effect: Effect.ALLOW, + }), + ], + }), + }, + }); - new CfnOutput(this, "AccessRoleArn", { - value: repoAccessRole.roleArn - }) - } -} \ No newline at end of file + new CfnOutput(this, 'AccessRoleArn', { + value: repoAccessRole.roleArn, + }); + } +} From ed611cdf08498f3360af396830c56c84fe168aec Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Wed, 31 Jan 2024 11:19:49 +0000 Subject: [PATCH 09/11] Add RiffRaff deploy step for repository. Fix tags --- .github/workflows/build-whisper-docker.yml | 8 +++++--- .github/workflows/ci.yaml | 10 +++++++++- packages/cdk/lib/repository.ts | 2 +- packages/cdk/riff-raff-repository.yaml | 14 ++++++++++++++ 4 files changed, 29 insertions(+), 5 deletions(-) create mode 100644 packages/cdk/riff-raff-repository.yaml diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index 5d6bd366..dcef7fb5 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -12,8 +12,9 @@ on: # Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. env: - REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} + BUILD_NUMBER: ${{ github.run_number }} + GITHUB_REGISTRY: ghcr.io # There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. jobs: @@ -35,16 +36,17 @@ jobs: - name: Log in to the Container registry uses: docker/login-action@v3 with: - registry: ${{ env.REGISTRY }} + registry: ${{ env.GITHUB_REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. + # Note - this step is the thing where we are indicating which repositories we want to push the container to - name: Extract metadata (tags, labels) for Docker id: meta uses: docker/metadata-action@v5 with: images: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + ${{ env.GITHUB_REGISTRY }}/${{ env.IMAGE_NAME }} ${{ secrets.TRANSCRIPTION_SERVICE_ECR_URI }} tags: | type=sha diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 558c16c1..ae758413 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,5 +51,13 @@ jobs: - packages/cdk/cdk.out/TranscriptionService-PROD.template.json transcription-service: - packages/api/target/api.zip - transcription-service-worker: - packages/worker/target/worker.zip + - name: Upload repository project to riff-raff + uses: guardian/actions-riff-raff@v3 + with: + githubToken: ${{ secrets.GITHUB_TOKEN }} + configPath: packages/cdk/riff-raff-repository.yaml + projectName: investigations::transcription-service-repository + contentDirectories: | + cdk.out: + - packages/cdk/cdk.out/TranscriptionServiceRepository.template.json diff --git a/packages/cdk/lib/repository.ts b/packages/cdk/lib/repository.ts index f5e70846..6ce105ba 100644 --- a/packages/cdk/lib/repository.ts +++ b/packages/cdk/lib/repository.ts @@ -37,7 +37,7 @@ export class TranscriptionServiceRepository extends GuStack { maxImageCount: 5, }, ], - imageTagMutability: TagMutability.IMMUTABLE, + imageTagMutability: TagMutability.MUTABLE, removalPolicy: RemovalPolicy.DESTROY, imageScanOnPush: true, }); diff --git a/packages/cdk/riff-raff-repository.yaml b/packages/cdk/riff-raff-repository.yaml new file mode 100644 index 00000000..e8f25c36 --- /dev/null +++ b/packages/cdk/riff-raff-repository.yaml @@ -0,0 +1,14 @@ +allowedStages: + - PROD +deployments: + cfn-eu-west-1-investigations-transcription-service-repository: + type: cloud-formation + regions: + - eu-west-1 + stacks: + - investigations + app: transcription-service-repository + contentDirectory: cdk.out + parameters: + templateStagePaths: + PROD: TranscriptionServiceRepository.template.json From f5be08ce907de9b2eca237aa372b276d15c0799c Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Wed, 31 Jan 2024 13:14:58 +0000 Subject: [PATCH 10/11] Remove curl from docker image --- whisper_container/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whisper_container/Dockerfile b/whisper_container/Dockerfile index 27d421f8..97dd13b2 100644 --- a/whisper_container/Dockerfile +++ b/whisper_container/Dockerfile @@ -3,7 +3,7 @@ WORKDIR /opt LABEL com.theguardian.transcription-service.whisper-container="Whisper.cpp container with models downloaded, including ffmpeg" RUN apt-get update -RUN apt-get install -y ffmpeg curl wget git build-essential +RUN apt-get install -y ffmpeg wget git build-essential RUN git clone https://github.com/ggerganov/whisper.cpp RUN cd whisper.cpp && make RUN bash /opt/whisper.cpp/models/download-ggml-model.sh medium From 99f4b706772d520e0ab8b6d657249cd73d192ee4 Mon Sep 17 00:00:00 2001 From: philmcmahon Date: Wed, 31 Jan 2024 13:15:33 +0000 Subject: [PATCH 11/11] Remove branch restriction --- .github/workflows/build-whisper-docker.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index dcef7fb5..75362e6f 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -5,7 +5,6 @@ name: Build and publish whisper docker image on: workflow_dispatch: push: - branches: ['main', 'whisper-docker'] paths: - "whisper_container/Dockerfile" - ".github/workflows/build-whisper-docker.yml"