Skip to content

Commit

Permalink
feat(cli serve): add a new reranker BCEReranking and github action to…
Browse files Browse the repository at this point in the history
… build core-library-cli

Signed-off-by: bjwswang <bjwswang@gmail.com>
  • Loading branch information
bjwswang committed Mar 1, 2024
1 parent 6b0a893 commit f79e4bf
Show file tree
Hide file tree
Showing 9 changed files with 221 additions and 17 deletions.
61 changes: 61 additions & 0 deletions .github/workflows/image_build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Build KubeAGI Core Library CLI Image

on:
push:
branches: [main]
paths:
- 'libs/**'
workflow_dispatch:
env:
PYTHON_INDEX_URL: https://pypi.org/simple

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set Variable
id: set-env
run: |
TAG=$(git describe --tags --abbrev=0 --match 'v*' 2> /dev/null) || true
if [ -z "$TAG" ]; then
echo "No tag found, use v0.0.1 as default"
TAG=v0.0.1
fi
echo "TAG=${TAG}" >> $GITHUB_OUTPUT
echo "DATE=$(TZ=Asia/Shanghai date +'%Y%m%d')" >> $GITHUB_OUTPUT
- name: Show Variable
run: echo "varibables ${{ steps.set-env.outputs.TAG }}-${{ steps.set-env.outputs.DATE }}"
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
buildkitd-flags: --debug
config-inline: |
[worker.oci]
max-parallelism = 1
- name: Login to the dockerhub Registry
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_TOKEN }}
- uses: benjlevesque/short-sha@v2.2
name: Get short commit sha
id: short-sha
- name: Build and push Evaluation Image
id: push-eval
uses: docker/build-push-action@v5
with:
context: .
file: docker/Dockerfile
platforms: linux/amd64,linux/arm64
tags: |
kubeagi/core-library-cli:latest
kubeagi/core-library-cli:${{ steps.set-env.outputs.TAG }}
kubeagi/core-library-cli:${{ steps.set-env.outputs.TAG }}-${{ steps.set-env.outputs.DATE }}-${{ steps.short-sha.outputs.sha }}
push: true
build-args: |
PYTHON_INDEX_URL=${{ env.PYTHON_INDEX_URL }}
42 changes: 42 additions & 0 deletions .github/workflows/image_build_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Test Build KubeAGI Core Library CLI Image

on:
pull_request:
branches: [main]
paths:
- 'libs/**'
workflow_dispatch:
env:
PYTHON_INDEX_URL: https://pypi.org/simple

jobs:
test_image_build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
with:
buildkitd-flags: --debug
config-inline: |
[worker.oci]
max-parallelism = 1
- name: Set up GCC
uses: egor-tensin/setup-gcc@v1
with:
version: latest
platform: x64
- name: Build core library cli image
id: push-worker
uses: docker/build-push-action@v5
with:
context: .
file: docker/Dockerfile
platforms: linux/amd64,linux/arm64
push: false
build-args: |
PYTHON_INDEX_URL=${{ env.PYTHON_INDEX_URL }}
27 changes: 27 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
ARG PY_VER=3.11

# python environment
FROM python:${PY_VER}-slim AS runner
ARG PACKAGE_REGISTRY="mirrors.tuna.tsinghua.edu.cn"
RUN sed -i 's/deb.debian.org/mirrors.tuna.tsinghua.edu.cn/g' /etc/apt/sources.list.d/debian.sources

ENV TZ=Asia/Shanghai
RUN export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get install -y tzdata \
&& ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& dpkg-reconfigure --frontend noninteractive tzdata

# Official: https://pypi.org/simple
ARG PYTHON_INDEX_URL=https://pypi.mirrors.ustc.edu.cn/simple/
COPY libs /libs
RUN python -m pip install ragas langchain==0.0.1 -i ${PYTHON_INDEX_URL}
WORKDIR /libs/core
RUN pip install -e . -i ${PYTHON_INDEX_URL}

WORKDIR /libs/cli
RUN pip install -e . -i ${PYTHON_INDEX_URL}

ENV RERANKING_MODEL_PATH=BAAI/bge-reranker-large

CMD [ "python","kubeagi_cli/cli.py","serve" ]
3 changes: 2 additions & 1 deletion libs/cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,4 +51,5 @@ kubeagi-cli evaluate context_precision ~/core-library/examples/testdata/ragas.cs
```

The above command will run the rag evaluation with metrics `context_precision` and test dataset `~/core-library/examples/testdata/ragas.csv` with the help
of OpenAI.
of OpenAI.

1 change: 0 additions & 1 deletion libs/cli/kubeagi_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import os
import typer
from typing_extensions import Annotated
from typing import Optional
from kubeagi_core.evaluation.ragas_eval import RagasEval


Expand Down
62 changes: 55 additions & 7 deletions libs/cli/kubeagi_cli/serve/reranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,76 @@

from typing import List
from FlagEmbedding import FlagReranker
from BCEmbedding import RerankerModel
from abc import ABC, abstractmethod


class Reranking:
class BaseReranking(ABC):
"""
The Reranking is used to run reranking models like bge-reranker-large(https://huggingface.co/BAAI/bge-reranker-large)
The Reranking is used to run reranking models.
"""

@abstractmethod
def run(self, pairs: List[List[str]]):
"""run reranking models to rerank pairs."""


class BGEReranking(BaseReranking):
"""
The BGEReranking is used to run reranking models with FlagEmbedding like sbge-reranker-large(https://huggingface.co/BAAI/bge-reranker-large)
"""

_model_path: str
_reranker: FlagReranker

def __init__(
self,
model_path: str,
model_name_or_path: str,
):
self._reranker = FlagReranker(
model_name_or_path=model_name_or_path, use_fp16=False
)

# run bge reranking model
def run(self, pairs: List[List[str]]):
if len(pairs) > 0:
result = self._reranker.compute_score(pairs)
if isinstance(result, float):
result = [result]
return result
else:
return None


class BCEReranking(BaseReranking):
"""
The BGEReranking is used to run reranking models with BCEEmbedding from https://github.com/netease-youdao/BCEmbedding
"""

_reranker: RerankerModel

def __init__(
self,
model_name_or_path: str,
):
self._model_path = model_path
self._reranker = RerankerModel(
model_name_or_path=model_name_or_path, use_fp16=False
)

# run bge reranking model
def run(self, pairs: List[List[str]]):
reranker = FlagReranker(self._model_path, use_fp16=False)
if len(pairs) > 0:
result = reranker.compute_score(pairs)
result = self._reranker.compute_score(pairs)
if isinstance(result, float):
result = [result]
return result
else:
return None


def select_reranking(model_name_or_path: str) -> BaseReranking:
if "bge" in model_name_or_path.lower():
return BGEReranking(model_name_or_path)
if "bce" in model_name_or_path.lower():
return BCEReranking(model_name_or_path)

raise ValueError(f"No valid reranking runner for {model_name_or_path}")
20 changes: 20 additions & 0 deletions libs/cli/kubeagi_cli/serve/whisper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2024 KubeAGI.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# TODO
class Whisper:
"""
The Whisper is used to run OpenAI's whisper model
"""
15 changes: 9 additions & 6 deletions libs/cli/kubeagi_cli/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
from typing import Optional, List
from fastapi import FastAPI

from pydantic import BaseModel
from serve.reranking import Reranking
from pydantic import BaseModel, Field
from serve.reranking import select_reranking

app = FastAPI()

Expand All @@ -28,14 +28,17 @@ def health():


class RerankingInput(BaseModel):
model_name_or_path: Optional[str] = Field(default=os.getenv("RERANKING_MODEL_PATH"))
question: str
answers: Optional[List[str]]


@app.post("/api/v1/reranking")
def reranking(input_docs: RerankingInput):
def reranking(input: RerankingInput):
# select reranking models based on model path
reranker = select_reranking(input.model_name_or_path)
# prepare reranking pairs
pairs = []
for answer in input_docs.answers:
pairs.append([input_docs.question, answer])
reranker = Reranking(model_path=os.getenv("RERANKING_MODEL_PATH"))
for answer in input.answers:
pairs.append([input.question, answer])
return reranker.run(pairs)
7 changes: 5 additions & 2 deletions libs/cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ classifiers = [
]
dependencies = [
"typer==0.9.0",
"FlagEmbedding=1.2.5",
"kubeagi-core=0.0.1",
"fastapi==0.109.0",
"uvicorn==0.27.0",
"FlagEmbedding==1.2.3",
"BCEmbedding==0.1.3",
"kubeagi-core==0.0.1",
]

[project.optional-dependencies]
Expand Down

0 comments on commit f79e4bf

Please sign in to comment.