Skip to content
This repository has been archived by the owner on Jun 25, 2023. It is now read-only.

Electriclizard solution #14

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions autotests/helm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ global:
activeDeadlineSeconds: 3600 # 1h

env:
PARTICIPANT_NAME: <REPLACE_WITH_USERNAME>
api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/<REPLACE_WITH_ENDPOINT>
PARTICIPANT_NAME: electriclizard
api_host: http://inca-smc-mlops-challenge-solution.default.svc.cluster.local/process

# K6, do not edit!
K6_PROMETHEUS_RW_SERVER_URL: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write
Expand Down
16 changes: 16 additions & 0 deletions solution/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM huggingface/transformers-pytorch-gpu
ARG DEBIAN_FRONTEND=noninteractive

WORKDIR /src
ENV PYTHONPATH="${PYTHONPATH}:${WORKDIR}"

COPY requirements.txt $WORKDIR

RUN apt-get update && apt upgrade -y && \
apt-get install -y libsm6 libxrender1 libfontconfig1 libxext6 libgl1-mesa-glx ffmpeg && \
pip install -U pip setuptools && \
pip install -U --no-cache-dir -r requirements.txt

COPY . $WORKDIR

ENTRYPOINT [ "python3", "app.py" ]
86 changes: 86 additions & 0 deletions solution/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from typing import List
from configs.config import AppConfig, ModelConfig

import uvicorn
from fastapi import FastAPI, APIRouter
from fastapi.openapi.docs import get_swagger_ui_html
from fastapi.openapi.utils import get_openapi
from fastapi.responses import HTMLResponse
from starlette.requests import Request

from infrastructure.models import TransformerTextClassificationModel
from service.recognition import TextClassificationService
from handlers.recognition import PredictionHandler
from handlers.data_models import ResponseSchema


def build_models(model_configs: List[ModelConfig]) -> List[TransformerTextClassificationModel]:
models = [
TransformerTextClassificationModel(conf.model, conf.model_path, conf.tokenizer)
for conf in model_configs
]
return models


config = AppConfig.parse_file("./configs/app_config.yaml")
models = build_models(config.models)

recognition_service = TextClassificationService(models)
recognition_handler = PredictionHandler(recognition_service)

app = FastAPI()
router = APIRouter()


@router.post("/process", response_model=ResponseSchema)
async def process(request: Request):
text = (await request.body()).decode()
# call handler
result = recognition_handler.handle(text)
return result


app.include_router(router)


@app.get("/healthcheck")
async def main():
return {"message": "I am alive"}


def custom_openapi():
if app.openapi_schema:
return app.openapi_schema
openapi_schema = get_openapi(
title="NLP Model Service",
version="0.1.0",
description="Inca test task",
routes=app.routes,
)
app.openapi_schema = openapi_schema
return app.openapi_schema


@app.get(
"/documentation/swagger-ui/",
response_class=HTMLResponse,
)
async def swagger_ui_html():
return get_swagger_ui_html(
openapi_url="/documentation/openapi.json",
title="API documentation"
)


@app.get(
"/documentation/openapi.json",
response_model_exclude_unset=True,
response_model_exclude_none=True,
)
async def openapi_endpoint():
return custom_openapi()


if __name__ == "__main__":
uvicorn.run("app:app", host="0.0.0.0", port=config.port, workers=config.workers)

19 changes: 19 additions & 0 deletions solution/configs/app_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
models:
- model: "cardiffnlp"
model_path: "cardiffnlp/twitter-xlm-roberta-base-sentiment"
tokenizer: "cardiffnlp/twitter-xlm-roberta-base-sentiment"
- model: "ivanlau"
model_path: "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
tokenizer: "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base"
- model: "svalabs"
model_path: "svalabs/twitter-xlm-roberta-crypto-spam"
tokenizer: "svalabs/twitter-xlm-roberta-crypto-spam"
- model: "EIStakovskii"
model_path: "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"
tokenizer: "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus"
- model: "jy46604790"
model_path: "jy46604790/Fake-News-Bert-Detect"
tokenizer: "jy46604790/Fake-News-Bert-Detect"

port: 8080
workers: 1
18 changes: 18 additions & 0 deletions solution/configs/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import List

from pydantic_yaml import YamlModel


class ModelConfig(YamlModel):
model: str
model_path: str
tokenizer: str


class AppConfig(YamlModel):
# model parameters
models: List[ModelConfig]
# app parameters
port: int
workers: int

17 changes: 17 additions & 0 deletions solution/handlers/data_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from typing import List

from pydantic import BaseModel, validator


class RecognitionSchema(BaseModel):
score: float
label: str


class ResponseSchema(BaseModel):
cardiffnlp: RecognitionSchema
ivanlau: RecognitionSchema
svalabs: RecognitionSchema
EIStakovskii: RecognitionSchema
jy46604790: RecognitionSchema

28 changes: 28 additions & 0 deletions solution/handlers/recognition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import List

from pydantic import ValidationError

from infrastructure.models import TextClassificationModelData
from service.recognition import TextClassificationService
from handlers.data_models import ResponseSchema, RecognitionSchema


class PredictionHandler:

def __init__(self, recognition_service: TextClassificationService):
self.recognition_service = recognition_service

def handle(self, body: str) -> ResponseSchema:
query_results = self.recognition_service.get_results(body)
result = self.serialize_answer(query_results)
return result

def serialize_answer(self, results: List[TextClassificationModelData]) -> ResponseSchema:
results = {rec.model_name: self._recognitions_to_schema(rec) for rec in results}
return ResponseSchema(**results)

def _recognitions_to_schema(self, recognition: TextClassificationModelData) -> RecognitionSchema:
if recognition.model_name != "ivanlau":
recognition.label = recognition.label.upper()
return RecognitionSchema(score=recognition.score, label=recognition.label)

13 changes: 13 additions & 0 deletions solution/helm/envs/electriclizard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
global:
# add any variables you need in format `key: value`
# variables will be available in the container as environment variables

# change 8000 to your application target port
pod:
ports:
- name: http
containerPort: 8080
protocol: TCP
service:
targetPort: 8080

52 changes: 52 additions & 0 deletions solution/infrastructure/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from abc import ABC, abstractmethod
from collections.abc import Callable
from dataclasses import dataclass

import torch
from transformers import pipeline


@dataclass
class TextClassificationModelData:
model_name: str
label: str
score: float


class BaseTextClassificationModel(ABC):

def __init__(self, name: str, model_path: str, tokenizer: str):
self.name = name
self.model_path = model_path
self.tokenizer = tokenizer
self.device = 0 if torch.cuda.is_available() else -1
self.model = self._load_model()

@abstractmethod
def _load_model(self) -> Callable:
...

@abstractmethod
def __call__(self, input_text: str) -> TextClassificationModelData:
...


class TransformerTextClassificationModel(BaseTextClassificationModel):

def _load_model(self):
sentiment_task = pipeline(
"sentiment-analysis",
model=self.model_path,
tokenizer=self.model_path,
device=self.device
)
return sentiment_task

def __call__(self, input_text: str) -> TextClassificationModelData:
if isinstance(input_text, str):
prediction = self.model(input_text)[0]
prediction = TextClassificationModelData(self.name, **prediction)
return prediction
else:
raise TypeError("Model input text must be str type")

6 changes: 6 additions & 0 deletions solution/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
fastapi[all]==0.95.1
uvicorn==0.22.0
numpy==1.23.5
pydantic==1.10.7
pydantic-yaml==0.11.2

16 changes: 16 additions & 0 deletions solution/service/recognition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from abc import ABC, abstractmethod
from typing import List
from dataclasses import dataclass

from infrastructure.models import BaseTextClassificationModel, TextClassificationModelData


class TextClassificationService:

def __init__(self, models: List[BaseTextClassificationModel]):
self.service_models = models

def get_results(self, input_text: str) -> List[TextClassificationModelData]:
results = [model(input_text) for model in self.service_models]
return results