diff --git a/.github/workflows/build-whisper-docker.yml b/.github/workflows/build-whisper-docker.yml index 75362e6f..a0d10adc 100644 --- a/.github/workflows/build-whisper-docker.yml +++ b/.github/workflows/build-whisper-docker.yml @@ -6,8 +6,8 @@ on: workflow_dispatch: push: paths: - - "whisper_container/Dockerfile" - - ".github/workflows/build-whisper-docker.yml" + - 'whisper_container/Dockerfile' + - '.github/workflows/build-whisper-docker.yml' # Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. env: @@ -70,5 +70,5 @@ jobs: push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - - + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/packages/worker/src/index.ts b/packages/worker/src/index.ts index 6df24287..464636ae 100644 --- a/packages/worker/src/index.ts +++ b/packages/worker/src/index.ts @@ -89,6 +89,7 @@ const main = async () => { ffmpegResult.wavPath, fileToTranscribe, numberOfThreads, + config.app.stage === 'PROD' ? 'medium' : 'tiny', ); const transcriptionOutput: TranscriptionOutput = { diff --git a/packages/worker/src/transcribe.ts b/packages/worker/src/transcribe.ts index 2e4f2105..86c039f6 100644 --- a/packages/worker/src/transcribe.ts +++ b/packages/worker/src/transcribe.ts @@ -13,6 +13,8 @@ interface FfmpegResult { duration?: number; } +export type WhisperModel = 'medium' | 'tiny'; + const CONTAINER_FOLDER = '/input'; const runSpawnCommand = ( @@ -144,8 +146,14 @@ export const getTranscriptionText = async ( wavPath: string, file: string, numberOfThreads: number, + model: WhisperModel, ) => { - const resultFile = await transcribe(containerId, wavPath, numberOfThreads); + const resultFile = await transcribe( + containerId, + wavPath, + numberOfThreads, + model, + ); const transcriptText = readFile( path.resolve(path.parse(file).dir, resultFile), ); @@ -156,6 +164,7 @@ const transcribe = async ( containerId: string, file: string, numberOfThreads: number, + model: WhisperModel, ) => { const outputFile = path.resolve(CONTAINER_FOLDER, path.parse(file).name); console.log(`transcribe outputFile: ${outputFile}`); @@ -166,7 +175,7 @@ const transcribe = async ( containerId, 'whisper.cpp/main', '--model', - 'whisper.cpp/models/ggml-medium.bin', + `whisper.cpp/models/ggml-${model}.bin`, '--threads', numberOfThreads.toString(), '--file', diff --git a/whisper_container/Dockerfile b/whisper_container/Dockerfile index f8197569..5474cf5f 100644 --- a/whisper_container/Dockerfile +++ b/whisper_container/Dockerfile @@ -5,7 +5,8 @@ LABEL com.theguardian.transcription-service.whisper-container="Whisper.cpp conta RUN apt-get update RUN apt-get install -y ffmpeg wget git build-essential RUN git clone https://github.com/ggerganov/whisper.cpp -RUN cd whisper.cpp && make +RUN cd whisper.cpp && git reset --hard v1.5.4 && make +RUN bash /opt/whisper.cpp/models/download-ggml-model.sh tiny RUN bash /opt/whisper.cpp/models/download-ggml-model.sh medium # Large model not currently in use - but we might want to add it as an option at some point