1712n · electriclizard · May 13, 2023 · May 13, 2023 · May 13, 2023 · May 13, 2023
@@ -25,8 +25,8 @@ global:
     activeDeadlineSeconds: 3600 # 1h
 
   env:
-    PARTICIPANT_NAME: <REPLACE_WITH_USERNAME>
-    api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/<REPLACE_WITH_ENDPOINT>
+    PARTICIPANT_NAME: electriclizard
+    api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/process
 
     # K6, do not edit!
     K6_PROMETHEUS_RW_SERVER_URL: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write

@@ -0,0 +1,16 @@
+FROM huggingface/transformers-pytorch-gpu
+ARG DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /src
+ENV PYTHONPATH="${PYTHONPATH}:${WORKDIR}"
+
+COPY requirements.txt $WORKDIR
+
+RUN apt-get update && apt upgrade -y && \
+		apt-get install -y libsm6 libxrender1 libfontconfig1 libxext6 libgl1-mesa-glx ffmpeg && \
+		pip install -U pip setuptools && \
+		pip install -U --no-cache-dir -r requirements.txt
+
+COPY . $WORKDIR
+
+ENTRYPOINT [ "python3", "app.py" ]
@@ -0,0 +1,86 @@
+from typing import List
+from configs.config import AppConfig, ModelConfig
+
+import uvicorn
+from fastapi import FastAPI, APIRouter
+from fastapi.openapi.docs import get_swagger_ui_html
+from fastapi.openapi.utils import get_openapi
+from fastapi.responses import HTMLResponse
+from starlette.requests import Request
+
+from infrastructure.models import TransformerTextClassificationModel
+from service.recognition import TextClassificationService
+from handlers.recognition import PredictionHandler
+from handlers.data_models import ResponseSchema
+
+
+def build_models(model_configs: List[ModelConfig]) -> List[TransformerTextClassificationModel]:
+    models = [
+            TransformerTextClassificationModel(conf.model, conf.model_path, conf.tokenizer)
+            for conf in model_configs
+        ]
+    return models
+
+
+config = AppConfig.parse_file("./configs/app_config.yaml")
+models = build_models(config.models)
+
+recognition_service = TextClassificationService(models)
+recognition_handler = PredictionHandler(recognition_service)
+
+app = FastAPI()
+router = APIRouter()
+
+
+@router.post("/process", response_model=ResponseSchema)
+async def process(request: Request):
+    text = (await request.body()).decode()
+    # call handler
+    result = recognition_handler.handle(text)
+    return result
+
+
+app.include_router(router)
+
+
+@app.get("/healthcheck")
+async def main():
+    return {"message": "I am alive"}
+
+
+def custom_openapi():
+    if app.openapi_schema:
+        return app.openapi_schema
+    openapi_schema = get_openapi(
+        title="NLP Model Service",
+        version="0.1.0",
+        description="Inca test task",
+        routes=app.routes,
+    )
+    app.openapi_schema = openapi_schema
+    return app.openapi_schema
+
+
+@app.get(
+    "/documentation/swagger-ui/",
+    response_class=HTMLResponse,
+)
+async def swagger_ui_html():
+    return get_swagger_ui_html(
+        openapi_url="/documentation/openapi.json",
+        title="API documentation"
+    )
+
+
+@app.get(
+    "/documentation/openapi.json",
+    response_model_exclude_unset=True,
+    response_model_exclude_none=True,
+)
+async def openapi_endpoint():
+    return custom_openapi()
+
+
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=config.port, workers=config.workers)
+
@@ -0,0 +1,19 @@
+models:
+  - model: "cardiffnlp"
+    model_path: "cardiffnlp/twitter-xlm-roberta-base-sentiment"
+    tokenizer: "cardiffnlp/twitter-xlm-roberta-base-sentiment"
+  - model: "ivanlau"
+    model_path: "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
+    tokenizer: "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
+  - model: "svalabs"
+    model_path: "svalabs/twitter-xlm-roberta-crypto-spam"
+    tokenizer: "svalabs/twitter-xlm-roberta-crypto-spam"
+  - model: "EIStakovskii"
+    model_path: "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"
+    tokenizer: "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"
+  - model: "jy46604790"
+    model_path: "jy46604790/Fake-News-Bert-Detect"
+    tokenizer: "jy46604790/Fake-News-Bert-Detect"
+
+port: 8080
+workers: 1
@@ -0,0 +1,18 @@
+from typing import List
+
+from pydantic_yaml import YamlModel
+
+
+class ModelConfig(YamlModel):
+    model: str
+    model_path: str
+    tokenizer: str
+
+
+class AppConfig(YamlModel):
+    # model parameters
+    models: List[ModelConfig]
+    # app parameters
+    port: int
+    workers: int
+
@@ -0,0 +1,17 @@
+from typing import List
+
+from pydantic import BaseModel, validator
+
+
+class RecognitionSchema(BaseModel):
+    score: float
+    label: str
+
+
+class ResponseSchema(BaseModel):
+    cardiffnlp: RecognitionSchema
+    ivanlau: RecognitionSchema
+    svalabs: RecognitionSchema
+    EIStakovskii: RecognitionSchema
+    jy46604790: RecognitionSchema
+
@@ -0,0 +1,28 @@
+from typing import List
+
+from pydantic import ValidationError
+
+from infrastructure.models import TextClassificationModelData
+from service.recognition import TextClassificationService
+from handlers.data_models import ResponseSchema, RecognitionSchema
+
+
+class PredictionHandler:
+
+    def __init__(self, recognition_service: TextClassificationService):
+        self.recognition_service = recognition_service
+
+    def handle(self, body: str) -> ResponseSchema:
+        query_results = self.recognition_service.get_results(body)
+        result = self.serialize_answer(query_results)
+        return result
+
+    def serialize_answer(self, results: List[TextClassificationModelData]) -> ResponseSchema:
+        results = {rec.model_name: self._recognitions_to_schema(rec) for rec in results}
+        return ResponseSchema(**results)
+
+    def _recognitions_to_schema(self, recognition: TextClassificationModelData) -> RecognitionSchema:
+        if recognition.model_name != "ivanlau":
+            recognition.label = recognition.label.upper()
+        return RecognitionSchema(score=recognition.score, label=recognition.label)
+
@@ -0,0 +1,13 @@
+global:
+  # add any variables you need in format `key: value`
+  # variables will be available in the container as environment variables
+
+  # change 8000 to your application target port
+  pod:
+    ports:
+      - name: http
+        containerPort: 8080
+        protocol: TCP
+  service:
+    targetPort: 8080
+
@@ -0,0 +1,52 @@
+from abc import ABC, abstractmethod
+from collections.abc import Callable
+from dataclasses import dataclass
+
+import torch
+from transformers import pipeline
+
+
+@dataclass
+class TextClassificationModelData:
+    model_name: str
+    label: str
+    score: float
+
+
+class BaseTextClassificationModel(ABC):
+
+    def __init__(self, name: str, model_path: str, tokenizer: str):
+        self.name = name
+        self.model_path = model_path
+        self.tokenizer = tokenizer
+        self.device = 0 if torch.cuda.is_available() else -1
+        self.model = self._load_model()
+
+    @abstractmethod
+    def _load_model(self) -> Callable:
+        ...
+
+    @abstractmethod
+    def __call__(self, input_text: str) -> TextClassificationModelData:
+        ...
+
+
+class TransformerTextClassificationModel(BaseTextClassificationModel):
+
+    def _load_model(self):
+        sentiment_task = pipeline(
+                "sentiment-analysis",
+                model=self.model_path,
+                tokenizer=self.model_path,
+                device=self.device
+                )
+        return sentiment_task
+
+    def __call__(self, input_text: str) -> TextClassificationModelData:
+        if isinstance(input_text, str):
+            prediction = self.model(input_text)[0]
+            prediction = TextClassificationModelData(self.name, **prediction)
+            return prediction
+        else:
+            raise TypeError("Model input text must be str type")
+
@@ -0,0 +1,6 @@
+fastapi[all]==0.95.1
+uvicorn==0.22.0
+numpy==1.23.5
+pydantic==1.10.7
+pydantic-yaml==0.11.2
+
@@ -0,0 +1,16 @@
+from abc import ABC, abstractmethod
+from typing import List
+from dataclasses import dataclass
+
+from infrastructure.models import BaseTextClassificationModel, TextClassificationModelData
+
+
+class TextClassificationService:
+
+    def __init__(self, models: List[BaseTextClassificationModel]):
+        self.service_models = models
+
+    def get_results(self, input_text: str) -> List[TextClassificationModelData]:
+        results = [model(input_text) for model in self.service_models]
+        return results
+