diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml new file mode 100644 index 00000000..75362e6f --- /dev/null +++ b/.github/workflows/build-whisper-docker.yml @@ -0,0 +1,74 @@ +# +name: Build and publish whisper docker image + +# Configures this workflow to run every time a change is pushed to the branch called `release`. +on: + workflow_dispatch: + push: + paths: + - "whisper_container/Dockerfile" + - ".github/workflows/build-whisper-docker.yml" + +# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. +env: + IMAGE_NAME: ${{ github.repository }} + BUILD_NUMBER: ${{ github.run_number }} + GITHUB_REGISTRY: ghcr.io + +# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. +jobs: + build-and-push-image: + runs-on: ubuntu-latest + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + # Uses the `docker/login-action` action to log in to the Container registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.GITHUB_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. + # Note - this step is the thing where we are indicating which repositories we want to push the container to + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ env.GITHUB_REGISTRY }}/${{ env.IMAGE_NAME }} + ${{ secrets.TRANSCRIPTION_SERVICE_ECR_URI }} + tags: | + type=sha + type=raw,value=latest + - uses: aws-actions/configure-aws-credentials@v4 + name: Configure AWS credentials for pushing to ECR + with: + role-to-assume: ${{ secrets.INVESTIGATIONS_GITHUB_ACTIONS_ROLE_ARN }} + aws-region: eu-west-1 + - name: Login to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. + # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. + # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: whisper_container/ + # to add x86: linux/amd64 + platforms: linux/arm64 + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 558c16c1..ae758413 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -51,5 +51,13 @@ jobs: - packages/cdk/cdk.out/TranscriptionService-PROD.template.json transcription-service: - packages/api/target/api.zip - transcription-service-worker: - packages/worker/target/worker.zip + - name: Upload repository project to riff-raff + uses: guardian/actions-riff-raff@v3 + with: + githubToken: ${{ secrets.GITHUB_TOKEN }} + configPath: packages/cdk/riff-raff-repository.yaml + projectName: investigations::transcription-service-repository + contentDirectories: | + cdk.out: + - packages/cdk/cdk.out/TranscriptionServiceRepository.template.json diff --git a/package-lock.json b/package-lock.json index b40bac9e..be7461d1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3860,13 +3860,13 @@ }, "node_modules/aws-cdk-lib/node_modules/@balena/dockerignore": { "version": "1.0.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "Apache-2.0" }, "node_modules/aws-cdk-lib/node_modules/ajv": { "version": "8.12.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3882,7 +3882,7 @@ }, "node_modules/aws-cdk-lib/node_modules/ansi-regex": { "version": "5.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -3891,7 +3891,7 @@ }, "node_modules/aws-cdk-lib/node_modules/ansi-styles": { "version": "4.3.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3906,7 +3906,7 @@ }, "node_modules/aws-cdk-lib/node_modules/astral-regex": { "version": "2.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -3915,13 +3915,13 @@ }, "node_modules/aws-cdk-lib/node_modules/balanced-match": { "version": "1.0.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/brace-expansion": { "version": "1.1.11", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3931,7 +3931,7 @@ }, "node_modules/aws-cdk-lib/node_modules/case": { "version": "1.6.3", - "extraneous": true, + "dev": true, "inBundle": true, "license": "(MIT OR GPL-3.0-or-later)", "engines": { @@ -3940,7 +3940,7 @@ }, "node_modules/aws-cdk-lib/node_modules/color-convert": { "version": "2.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3952,31 +3952,31 @@ }, "node_modules/aws-cdk-lib/node_modules/color-name": { "version": "1.1.4", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/concat-map": { "version": "0.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/emoji-regex": { "version": "8.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/fast-deep-equal": { "version": "3.1.3", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/fs-extra": { "version": "11.2.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -3990,13 +3990,13 @@ }, "node_modules/aws-cdk-lib/node_modules/graceful-fs": { "version": "4.2.11", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC" }, "node_modules/aws-cdk-lib/node_modules/ignore": { "version": "5.3.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4005,7 +4005,7 @@ }, "node_modules/aws-cdk-lib/node_modules/is-fullwidth-code-point": { "version": "3.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4014,13 +4014,13 @@ }, "node_modules/aws-cdk-lib/node_modules/json-schema-traverse": { "version": "1.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/jsonfile": { "version": "6.1.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4032,7 +4032,7 @@ }, "node_modules/aws-cdk-lib/node_modules/jsonschema": { "version": "1.4.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4041,13 +4041,13 @@ }, "node_modules/aws-cdk-lib/node_modules/lodash.truncate": { "version": "4.4.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT" }, "node_modules/aws-cdk-lib/node_modules/lru-cache": { "version": "6.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "dependencies": { @@ -4059,7 +4059,7 @@ }, "node_modules/aws-cdk-lib/node_modules/minimatch": { "version": "3.1.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "dependencies": { @@ -4071,7 +4071,7 @@ }, "node_modules/aws-cdk-lib/node_modules/punycode": { "version": "2.3.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4080,7 +4080,7 @@ }, "node_modules/aws-cdk-lib/node_modules/require-from-string": { "version": "2.0.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4089,7 +4089,7 @@ }, "node_modules/aws-cdk-lib/node_modules/semver": { "version": "7.5.4", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "dependencies": { @@ -4104,7 +4104,7 @@ }, "node_modules/aws-cdk-lib/node_modules/slice-ansi": { "version": "4.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4121,7 +4121,7 @@ }, "node_modules/aws-cdk-lib/node_modules/string-width": { "version": "4.2.3", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4135,7 +4135,7 @@ }, "node_modules/aws-cdk-lib/node_modules/strip-ansi": { "version": "6.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "dependencies": { @@ -4147,7 +4147,7 @@ }, "node_modules/aws-cdk-lib/node_modules/table": { "version": "6.8.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "BSD-3-Clause", "dependencies": { @@ -4163,7 +4163,7 @@ }, "node_modules/aws-cdk-lib/node_modules/universalify": { "version": "2.0.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "MIT", "engines": { @@ -4172,7 +4172,7 @@ }, "node_modules/aws-cdk-lib/node_modules/uri-js": { "version": "4.4.1", - "extraneous": true, + "dev": true, "inBundle": true, "license": "BSD-2-Clause", "dependencies": { @@ -4181,13 +4181,13 @@ }, "node_modules/aws-cdk-lib/node_modules/yallist": { "version": "4.0.0", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC" }, "node_modules/aws-cdk-lib/node_modules/yaml": { "version": "1.10.2", - "extraneous": true, + "dev": true, "inBundle": true, "license": "ISC", "engines": { diff --git a/packages/cdk/bin/cdk.ts b/packages/cdk/bin/cdk.ts index 8ab432b1..2003384e 100644 --- a/packages/cdk/bin/cdk.ts +++ b/packages/cdk/bin/cdk.ts @@ -1,5 +1,6 @@ import 'source-map-support/register'; import { GuRoot } from '@guardian/cdk/lib/constructs/root'; +import { TranscriptionServiceRepository } from '../lib/repository'; import { TranscriptionService } from '../lib/transcription-service'; const app = new GuRoot(); @@ -13,3 +14,10 @@ new TranscriptionService(app, 'TranscriptionService-PROD', { stage: 'PROD', env: { region: 'eu-west-1' }, }); + +// repository will be shared between CODE and PROD so needs to be a separate stack +new TranscriptionServiceRepository(app, 'TranscriptionServiceRepository', { + stack: 'investigations', + stage: 'PROD', + env: { region: 'eu-west-1' }, +}); diff --git a/packages/cdk/lib/repository.ts b/packages/cdk/lib/repository.ts new file mode 100644 index 00000000..6ce105ba --- /dev/null +++ b/packages/cdk/lib/repository.ts @@ -0,0 +1,97 @@ +import type { GuStackProps } from '@guardian/cdk/lib/constructs/core'; +import { GuStack, GuStringParameter } from '@guardian/cdk/lib/constructs/core'; +import type { App } from 'aws-cdk-lib'; +import { CfnOutput, RemovalPolicy } from 'aws-cdk-lib'; +import { Repository, TagMutability } from 'aws-cdk-lib/aws-ecr'; +import { + AccountPrincipal, + ArnPrincipal, + Effect, + PolicyDocument, + PolicyStatement, + Role, +} from 'aws-cdk-lib/aws-iam'; + +export class TranscriptionServiceRepository extends GuStack { + constructor(scope: App, id: string, props: GuStackProps) { + super(scope, id, props); + const githubActionsIAMRoleArn = new GuStringParameter( + this, + 'GithubActionsIAMRoleArn', + { + description: 'IAM role for role used by github actions workflows', + }, + ); + const deployToolsAccountNumber = new GuStringParameter( + this, + 'DeployToolsAccount', + { + description: + 'Deploy tools account id - needed to give AMIgo access to this repository', + }, + ); + const repository = new Repository(this, 'TranscriptionServiceRepository', { + repositoryName: `transcription-service`, + lifecycleRules: [ + { + maxImageCount: 5, + }, + ], + imageTagMutability: TagMutability.MUTABLE, + removalPolicy: RemovalPolicy.DESTROY, + imageScanOnPush: true, + }); + repository.addToResourcePolicy( + new PolicyStatement({ + principals: [new ArnPrincipal(githubActionsIAMRoleArn.valueAsString)], + actions: [ + 'ecr:GetAuthorizationToken', + 'ecr:BatchCheckLayerAvailability', + 'ecr:GetDownloadUrlForLayer', + 'ecr:GetRepositoryPolicy', + 'ecr:DescribeRepositories', + 'ecr:ListImages', + 'ecr:DescribeImages', + 'ecr:BatchGetImage', + 'ecr:InitiateLayerUpload', + 'ecr:UploadLayerPart', + 'ecr:CompleteLayerUpload', + 'ecr:PutImage', + ], + effect: Effect.ALLOW, + }), + ); + + const repoAccessRole = new Role(this, 'RepoAccessRole', { + roleName: 'TranscriptionServiceRepoAccessRole', + assumedBy: new AccountPrincipal(deployToolsAccountNumber.valueAsString), + inlinePolicies: { + TranscriptionServiceRepoAccessPolicy: new PolicyDocument({ + statements: [ + new PolicyStatement({ + actions: ['ecr:GetAuthorizationToken'], + resources: ['*'], + effect: Effect.ALLOW, + }), + new PolicyStatement({ + actions: [ + 'ecr:GetDownloadUrlForLayer', + 'ecr:BatchGetImage', + 'ecr:BatchCheckLayerAvailability', + 'ecr:DescribeImages', + 'ecr:ListImages', + 'ecr:GetDownloadUrlForLayer', + ], + resources: [repository.repositoryArn], + effect: Effect.ALLOW, + }), + ], + }), + }, + }); + + new CfnOutput(this, 'AccessRoleArn', { + value: repoAccessRole.roleArn, + }); + } +} diff --git a/packages/cdk/riff-raff-repository.yaml b/packages/cdk/riff-raff-repository.yaml new file mode 100644 index 00000000..e8f25c36 --- /dev/null +++ b/packages/cdk/riff-raff-repository.yaml @@ -0,0 +1,14 @@ +allowedStages: + - PROD +deployments: + cfn-eu-west-1-investigations-transcription-service-repository: + type: cloud-formation + regions: + - eu-west-1 + stacks: + - investigations + app: transcription-service-repository + contentDirectory: cdk.out + parameters: + templateStagePaths: + PROD: TranscriptionServiceRepository.template.json diff --git a/whisper_container/Dockerfile b/whisper_container/Dockerfile new file mode 100644 index 00000000..97dd13b2 --- /dev/null +++ b/whisper_container/Dockerfile @@ -0,0 +1,12 @@ +FROM ubuntu:22.04 +WORKDIR /opt +LABEL com.theguardian.transcription-service.whisper-container="Whisper.cpp container with models downloaded, including ffmpeg" + +RUN apt-get update +RUN apt-get install -y ffmpeg wget git build-essential +RUN git clone https://github.com/ggerganov/whisper.cpp +RUN cd whisper.cpp && make +RUN bash /opt/whisper.cpp/models/download-ggml-model.sh medium + +# Large model not currently in use - but we might want to add it as an option at some point +#RUN bash /opt/whisper.cpp/models/download-ggml-model.sh large-v2 \ No newline at end of file