From b10375acfe78d9ae39a764249ae26a844bba61d3 Mon Sep 17 00:00:00 2001 From: Artur Date: Sun, 14 May 2023 00:39:13 +0500 Subject: [PATCH 1/6] infrastructure layer --- solution/infrastructure/models.py | 52 +++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 solution/infrastructure/models.py diff --git a/solution/infrastructure/models.py b/solution/infrastructure/models.py new file mode 100644 index 0000000..d63f8ed --- /dev/null +++ b/solution/infrastructure/models.py @@ -0,0 +1,52 @@ +from abc import ABC, abstractmethod +from collections.abc import Callable +from dataclasses import dataclass + +import torch +from transformers import pipeline + + +@dataclass +class TextClassificationModelData: + model_name: str + label: str + score: float + + +class BaseTextClassificationModel(ABC): + + def __init__(self, name: str, model_path: str, tokenizer: str): + self.name = name + self.model_path = model_path + self.tokenizer = tokenizer + self.device = 0 if torch.cuda.is_available() else -1 + self.model = self._load_model() + + @abstractmethod + def _load_model(self) -> Callable: + ... + + @abstractmethod + def __call__(self, input_text: str) -> TextClassificationModelData: + ... + + +class TransformerTextClassificationModel(BaseTextClassificationModel): + + def _load_model(self): + sentiment_task = pipeline( + "sentiment-analysis", + model=self.model_path, + tokenizer=self.model_path, + device=self.device + ) + return sentiment_task + + def __call__(self, input_text: str) -> TextClassificationModelData: + if isinstance(input_text, str): + prediction = self.model(input_text)[0] + prediction = TextClassificationModelData(self.name, **prediction) + return prediction + else: + raise TypeError("Model input text must be str type") + From 558714ab138392fde703e3ae5c98a23ac9102dc1 Mon Sep 17 00:00:00 2001 From: Artur Date: Sun, 14 May 2023 00:40:26 +0500 Subject: [PATCH 2/6] service layer --- solution/service/recognition.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 solution/service/recognition.py diff --git a/solution/service/recognition.py b/solution/service/recognition.py new file mode 100644 index 0000000..47adbba --- /dev/null +++ b/solution/service/recognition.py @@ -0,0 +1,16 @@ +from abc import ABC, abstractmethod +from typing import List +from dataclasses import dataclass + +from infrastructure.models import BaseTextClassificationModel, TextClassificationModelData + + +class TextClassificationService: + + def __init__(self, models: List[BaseTextClassificationModel]): + self.service_models = models + + def get_results(self, input_text: str) -> List[TextClassificationModelData]: + results = [model(input_text) for model in self.service_models] + return results + From 5923b45499bffd8b2e1533af657acfcc94c2b3da Mon Sep 17 00:00:00 2001 From: Artur Date: Sun, 14 May 2023 00:44:15 +0500 Subject: [PATCH 3/6] handlers, configuration and transport app layer --- solution/app.py | 86 ++++++++++++++++++++++++++++++++ solution/configs/app_config.yaml | 19 +++++++ solution/configs/config.py | 18 +++++++ solution/handlers/data_models.py | 17 +++++++ solution/handlers/recognition.py | 28 +++++++++++ 5 files changed, 168 insertions(+) create mode 100644 solution/app.py create mode 100644 solution/configs/app_config.yaml create mode 100644 solution/configs/config.py create mode 100644 solution/handlers/data_models.py create mode 100644 solution/handlers/recognition.py diff --git a/solution/app.py b/solution/app.py new file mode 100644 index 0000000..d084f61 --- /dev/null +++ b/solution/app.py @@ -0,0 +1,86 @@ +from typing import List +from configs.config import AppConfig, ModelConfig + +import uvicorn +from fastapi import FastAPI, APIRouter +from fastapi.openapi.docs import get_swagger_ui_html +from fastapi.openapi.utils import get_openapi +from fastapi.responses import HTMLResponse +from starlette.requests import Request + +from infrastructure.models import TransformerTextClassificationModel +from service.recognition import TextClassificationService +from handlers.recognition import PredictionHandler +from handlers.data_models import ResponseSchema + + +def build_models(model_configs: List[ModelConfig]) -> List[TransformerTextClassificationModel]: + models = [ + TransformerTextClassificationModel(conf.model, conf.model_path, conf.tokenizer) + for conf in model_configs + ] + return models + + +config = AppConfig.parse_file("./configs/app_config.yaml") +models = build_models(config.models) + +recognition_service = TextClassificationService(models) +recognition_handler = PredictionHandler(recognition_service) + +app = FastAPI() +router = APIRouter() + + +@router.post("/process", response_model=ResponseSchema) +async def process(request: Request): + text = (await request.body()).decode() + # call handler + result = recognition_handler.handle(text) + return result + + +app.include_router(router) + + +@app.get("/healthcheck") +async def main(): + return {"message": "I am alive"} + + +def custom_openapi(): + if app.openapi_schema: + return app.openapi_schema + openapi_schema = get_openapi( + title="NLP Model Service", + version="0.1.0", + description="Inca test task", + routes=app.routes, + ) + app.openapi_schema = openapi_schema + return app.openapi_schema + + +@app.get( + "/documentation/swagger-ui/", + response_class=HTMLResponse, +) +async def swagger_ui_html(): + return get_swagger_ui_html( + openapi_url="/documentation/openapi.json", + title="API documentation" + ) + + +@app.get( + "/documentation/openapi.json", + response_model_exclude_unset=True, + response_model_exclude_none=True, +) +async def openapi_endpoint(): + return custom_openapi() + + +if __name__ == "__main__": + uvicorn.run("app:app", host="0.0.0.0", port=config.port, workers=config.workers) + diff --git a/solution/configs/app_config.yaml b/solution/configs/app_config.yaml new file mode 100644 index 0000000..987f42c --- /dev/null +++ b/solution/configs/app_config.yaml @@ -0,0 +1,19 @@ +models: + - model: "cardiffnlp" + model_path: "cardiffnlp/twitter-xlm-roberta-base-sentiment" + tokenizer: "cardiffnlp/twitter-xlm-roberta-base-sentiment" + - model: "ivanlau" + model_path: "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base" + tokenizer: "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base" + - model: "svalabs" + model_path: "svalabs/twitter-xlm-roberta-crypto-spam" + tokenizer: "svalabs/twitter-xlm-roberta-crypto-spam" + - model: "EIStakovskii" + model_path: "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus" + tokenizer: "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus" + - model: "jy46604790" + model_path: "jy46604790/Fake-News-Bert-Detect" + tokenizer: "jy46604790/Fake-News-Bert-Detect" + +port: 8080 +workers: 1 diff --git a/solution/configs/config.py b/solution/configs/config.py new file mode 100644 index 0000000..97cc56f --- /dev/null +++ b/solution/configs/config.py @@ -0,0 +1,18 @@ +from typing import List + +from pydantic_yaml import YamlModel + + +class ModelConfig(YamlModel): + model: str + model_path: str + tokenizer: str + + +class AppConfig(YamlModel): + # model parameters + models: List[ModelConfig] + # app parameters + port: int + workers: int + diff --git a/solution/handlers/data_models.py b/solution/handlers/data_models.py new file mode 100644 index 0000000..a132a27 --- /dev/null +++ b/solution/handlers/data_models.py @@ -0,0 +1,17 @@ +from typing import List + +from pydantic import BaseModel, validator + + +class RecognitionSchema(BaseModel): + score: float + label: str + + +class ResponseSchema(BaseModel): + cardiffnlp: RecognitionSchema + ivanlau: RecognitionSchema + svalabs: RecognitionSchema + EIStakovskii: RecognitionSchema + jy46604790: RecognitionSchema + diff --git a/solution/handlers/recognition.py b/solution/handlers/recognition.py new file mode 100644 index 0000000..d536190 --- /dev/null +++ b/solution/handlers/recognition.py @@ -0,0 +1,28 @@ +from typing import List + +from pydantic import ValidationError + +from infrastructure.models import TextClassificationModelData +from service.recognition import TextClassificationService +from handlers.data_models import ResponseSchema, RecognitionSchema + + +class PredictionHandler: + + def __init__(self, recognition_service: TextClassificationService): + self.recognition_service = recognition_service + + def handle(self, body: str) -> ResponseSchema: + query_results = self.recognition_service.get_results(body) + result = self.serialize_answer(query_results) + return result + + def serialize_answer(self, results: List[TextClassificationModelData]) -> ResponseSchema: + results = {rec.model_name: self._recognitions_to_schema(rec) for rec in results} + return ResponseSchema(**results) + + def _recognitions_to_schema(self, recognition: TextClassificationModelData) -> RecognitionSchema: + if recognition.model_name != "ivanlau": + recognition.label = recognition.label.upper() + return RecognitionSchema(score=recognition.score, label=recognition.label) + From a9f60051112ef310e0831bd5974856693364ea3f Mon Sep 17 00:00:00 2001 From: Artur Date: Sun, 14 May 2023 00:44:42 +0500 Subject: [PATCH 4/6] Containerization --- solution/Dockerfile | 17 +++++++++++++++++ solution/requirements.txt | 6 ++++++ 2 files changed, 23 insertions(+) create mode 100644 solution/Dockerfile create mode 100644 solution/requirements.txt diff --git a/solution/Dockerfile b/solution/Dockerfile new file mode 100644 index 0000000..6f621c1 --- /dev/null +++ b/solution/Dockerfile @@ -0,0 +1,17 @@ +FROM huggingface/transformers-pytorch-gpu +ARG DEBIAN_FRONTEND=noninteractive + +WORKDIR /src +ENV PYTHONPATH="${PYTHONPATH}:${WORKDIR}" + +COPY requirements.txt $WORKDIR + +RUN apt-get update && apt upgrade -y && \ + apt-get install -y libsm6 libxrender1 libfontconfig1 libxext6 libgl1-mesa-glx ffmpeg && \ + pip install -U pip setuptools && \ + pip install -U --no-cache-dir -r requirements.txt + +COPY . $WORKDIR + +# ENTRYPOINT [ "python3", "app.py" ] +ENTRYPOINT [ "bash" ] diff --git a/solution/requirements.txt b/solution/requirements.txt new file mode 100644 index 0000000..7c28cc1 --- /dev/null +++ b/solution/requirements.txt @@ -0,0 +1,6 @@ +fastapi[all]==0.95.1 +uvicorn==0.22.0 +numpy==1.23.5 +pydantic==1.10.7 +pydantic-yaml==0.11.2 + From a9659cad4bad7798f2f9fc9c70ce6fdf76035bd8 Mon Sep 17 00:00:00 2001 From: Artur Date: Sun, 14 May 2023 00:45:05 +0500 Subject: [PATCH 5/6] helm chart updates --- autotests/helm/values.yaml | 4 ++-- solution/helm/envs/electriclizard.yaml | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 solution/helm/envs/electriclizard.yaml diff --git a/autotests/helm/values.yaml b/autotests/helm/values.yaml index cda6a5e..0b053f6 100644 --- a/autotests/helm/values.yaml +++ b/autotests/helm/values.yaml @@ -25,8 +25,8 @@ global: activeDeadlineSeconds: 3600 # 1h env: - PARTICIPANT_NAME: - api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/ + PARTICIPANT_NAME: electriclizard + api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/process # K6, do not edit! K6_PROMETHEUS_RW_SERVER_URL: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write diff --git a/solution/helm/envs/electriclizard.yaml b/solution/helm/envs/electriclizard.yaml new file mode 100644 index 0000000..6d3c75c --- /dev/null +++ b/solution/helm/envs/electriclizard.yaml @@ -0,0 +1,13 @@ +global: + # add any variables you need in format `key: value` + # variables will be available in the container as environment variables + + # change 8000 to your application target port + pod: + ports: + - name: http + containerPort: 8080 + protocol: TCP + service: + targetPort: 8080 + From 3bae4aa259ad602f98ea0b494992b0d0d1611177 Mon Sep 17 00:00:00 2001 From: Artur Date: Sun, 14 May 2023 00:49:30 +0500 Subject: [PATCH 6/6] entrypoint fix --- solution/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/solution/Dockerfile b/solution/Dockerfile index 6f621c1..ebb9413 100644 --- a/solution/Dockerfile +++ b/solution/Dockerfile @@ -13,5 +13,4 @@ RUN apt-get update && apt upgrade -y && \ COPY . $WORKDIR -# ENTRYPOINT [ "python3", "app.py" ] -ENTRYPOINT [ "bash" ] +ENTRYPOINT [ "python3", "app.py" ]