From c8f5bce9700e58b11501b8fb9b7990a7dd88e414 Mon Sep 17 00:00:00 2001
From: younesselrag <youness.elbrag@aigot,com>
Date: Mon, 9 Sep 2024 03:16:36 +0100
Subject: [PATCH] Dockerization Server side

---
 DockerFile              | 13 +++++++++
 DockerFile.fastapi.cuda | 21 +++++++++++++
 README.md               | 65 ++++-------------------------------------
 3 files changed, 39 insertions(+), 60 deletions(-)
 create mode 100644 DockerFile.fastapi.cuda

diff --git a/DockerFile b/DockerFile
index e69de29..7ddb4de 100644
--- a/DockerFile
+++ b/DockerFile
@@ -0,0 +1,13 @@
+FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel
+
+ENV PYTHONUNBUFFERED 1
+
+WORKDIR /usr/src/app
+
+# Install packages
+RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
\ No newline at end of file
diff --git a/DockerFile.fastapi.cuda b/DockerFile.fastapi.cuda
new file mode 100644
index 0000000..1a2a6ed
--- /dev/null
+++ b/DockerFile.fastapi.cuda
@@ -0,0 +1,21 @@
+# Copyright (C) 2024 Youness Elbrag
+
+FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04
+
+RUN apt-get update && apt-get install -y software-properties-common
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y python3.10 python3.10-venv python3-pip ffmpeg
+
+WORKDIR /Server-fastapi
+COPY . .
+
+RUN python3.10 -m venv venv
+RUN /bin/bash -c "source venv/bin/activate && pip install --upgrade pip"
+RUN /bin/bash -c "source venv/bin/activate && pip install -e ."
+RUN /bin/bash -c "source venv/bin/activate && pip install -r fastapi/requirements-fastapi-cuda.txt"
+
+WORKDIR /Server-fastapi/backend
+
+EXPOSE 8000
+
+CMD ["../venv/bin/uvicorn", "api.app:app", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/README.md b/README.md
index 2bb715c..ace185a 100644
--- a/README.md
+++ b/README.md
@@ -1,31 +1,20 @@
 # 🎤 Real-Time Speech-to-Speech (S2S) Conversion using Hugging Face Models
 
-**Speech-Ora** is an open-source web app for real-time speech-to-speech conversion, leveraging state-of-the-art models from the Hugging Face ecosystem to create interactions similar to GPT-like conversations.
-
-## 📖 Table of Contents
-* [Overview](#overview)
-  - [System Structure](#system-structure)
-  - [Modularity](#modularity)
-* [Setup Instructions](#setup-instructions)
-* [Usage](#usage)
-  - [Server/Client Approach](#serverclient-approach)
-  - [Local Approach](#local-approach)
-* [Command-Line Usage](#command-line-usage)
-  - [Model Parameters](#model-parameters)
-  - [Generation Parameters](#generation-parameters)
-  - [Key Parameters](#key-parameters)
-* [Docker Setup](#docker-setup)
+**Speech-OraAgent** is an open-source Automated-Agent Driven aackend for real-time speech-to-speech conversion, leveraging state-of-the-art models from the Hugging Face ecosystem to create interactions similar to GPT-like conversations.
+
 
 ## Overview
 
 ### System Structure
-This repository implements a speech-to-speech Server-Side/Client-side System deign with the following components based Open source model used to built so : 
+This Project implements a speech-to-speech Server-Side/Client-side backend calling APis  System design with the following components based Open source model used to built so : 
 
 1. **Voice Activity Detection (VAD)**: Powered by [Silero VAD v5](https://github.com/snakers4/silero-vad).
 2. **Speech to Text (STT)**: Uses Whisper models from the Hugging Face hub.
 3. **Language Model (LM)**: Any Hugging Face instruct model can be used.
 4. **Text to Speech (TTS)**: Uses [Parler-TTS](https://github.com/huggingface/parler-tts) for speech synthesis.
 
+**Note** : you can re-load a different Open Source model Or using APIs support Model Via Key APIs by Providers
+
 ### Modularity
 The pipeline is modular and flexible, allowing customization at each stage:
 - **VAD**: Integrates the [Silero VAD](https://github.com/snakers4/silero-vad).
@@ -48,48 +37,6 @@ All components are implemented as independent classes for easy customization.
    pip install -r requirements.txt
    ```
 
-## Usage
-
-The pipeline supports two primary modes of operation:
-1. **Server/Client Approach**: Models run on a server, and audio input/output are streamed between client and server.
-2. **Local Approach**: Uses a client-server architecture on the same machine using the loopback address.
-
-### Server/Client Approach
-
-1. Start the pipeline on the server:
-   
-
-2. Run the client to send audio input and receive the generated audio:
-  
-
-### Local Approach
-
-Run the pipeline locally by using the loopback address:
-
-
-## Command-Line Usage
-
-The pipeline includes customizable parameters for running the models.
-
-### Model Parameters
-
-- VAD Model: The voice activity detection model.
-- STT Model: Whisper model checkpoint from Hugging Face.
-- LM Model: Any Hugging Face instruct model.
-
-### Generation Parameters
-
-- Sampling Rate: Adjustable sampling rate for audio.
-- Chunk Size: Chunk size for audio processing.
-
-
-
-## Docker Setup
-
-To run the pipeline in a Docker container with GPU access, follow these steps:
-
- 
-
 ### Setup the Environment Set up GPU Access for Docker
 
 here stpes to run docker-Image of full the code run top on Containter . 
@@ -114,9 +61,7 @@ sudo apt-get install -y nvidia-container-toolkit
 ```bash 
 sudo nvidia-ctk runtime configure --runtime=docker
 ```
-Now you're ready to run the project in a container with GPU support.
 
-### Build and Run the Docker Container