From 087df444d544c6d6ff20bcc976d1de0c9e6fa24a Mon Sep 17 00:00:00 2001 From: Lanture1064 <34346740+Lanture1064@users.noreply.github.com> Date: Wed, 20 Mar 2024 11:52:45 +0800 Subject: [PATCH 1/3] chore: refactor ragas_once & split dependency --- .github/workflows/image_build.yml | 2 +- .github/workflows/image_build_test.yml | 2 +- .../workflows/{cli_test.yaml => server_test.yaml} | 4 ++-- Makefile | 8 +++++++- docker/{Dockerfile => Dockerfile.server} | 3 ++- libs/cli/README.md | 6 ++++++ libs/cli/kubeagi_cli/cli.py | 6 ++++-- libs/cli/kubeagi_cli/convert.py | 7 +++---- .../kubeagi_cli}/evaluation/__init__.py | 0 .../kubeagi_cli}/evaluation/ragas_eval.py | 0 libs/cli/pyproject.toml | 15 ++++++++++----- libs/core/README.md | 2 ++ libs/core/pyproject.toml | 12 ++++++------ 13 files changed, 44 insertions(+), 23 deletions(-) rename .github/workflows/{cli_test.yaml => server_test.yaml} (96%) rename docker/{Dockerfile => Dockerfile.server} (88%) rename libs/{core/kubeagi_core => cli/kubeagi_cli}/evaluation/__init__.py (100%) rename libs/{core/kubeagi_core => cli/kubeagi_cli}/evaluation/ragas_eval.py (100%) diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index 8858492..8ca1f57 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -60,7 +60,7 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: docker/Dockerfile + file: docker/Dockerfile.server platforms: linux/amd64,linux/arm64 tags: | kubeagi/core-library-cli:latest diff --git a/.github/workflows/image_build_test.yml b/.github/workflows/image_build_test.yml index 8f3e973..8739c18 100644 --- a/.github/workflows/image_build_test.yml +++ b/.github/workflows/image_build_test.yml @@ -46,7 +46,7 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: docker/Dockerfile + file: docker/Dockerfile.server platforms: linux/amd64,linux/arm64 push: false build-args: | diff --git a/.github/workflows/cli_test.yaml b/.github/workflows/server_test.yaml similarity index 96% rename from .github/workflows/cli_test.yaml rename to .github/workflows/server_test.yaml index 22efbf9..3c28628 100644 --- a/.github/workflows/cli_test.yaml +++ b/.github/workflows/server_test.yaml @@ -46,7 +46,7 @@ jobs: uses: docker/build-push-action@v5 with: context: . - file: docker/Dockerfile + file: docker/Dockerfile.server load: true push: false tags: test-image @@ -59,7 +59,7 @@ jobs: docker ps - name: Run test script run: | - docker logs cli-container + sleep 10 docker cp tests/example_test.sh cli-container:/tmp/example_test.sh docker exec cli-container bash /tmp/example_test.sh - name: Stop and Remove Container diff --git a/Makefile b/Makefile index 19f93d1..3ab4c02 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,13 @@ lint: .PHONY: install install: @pip install -e libs/core/ - @pip install -e libs/cli/ + @pip install -e 'libs/cli/.[server, core, eval]' + +install-eval: + @pip install -e 'libs/cli/.[eval]' + +install-server: + @pip install -e 'libs/cli/.[server]' .PHONY: server server: install diff --git a/docker/Dockerfile b/docker/Dockerfile.server similarity index 88% rename from docker/Dockerfile rename to docker/Dockerfile.server index eb5fed4..7ce6b99 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile.server @@ -17,12 +17,13 @@ RUN apt-get install -y gcc python3-dev # Official: https://pypi.org/simple ARG PYTHON_INDEX_URL=https://pypi.mirrors.ustc.edu.cn/simple/ COPY libs /libs +RUN python -m pip install --upgrade pip -i ${PYTHON_INDEX_URL} RUN python -m pip install ragas langchain sentencepiece protobuf -i ${PYTHON_INDEX_URL} WORKDIR /libs/core RUN pip install -e . -i ${PYTHON_INDEX_URL} WORKDIR /libs/cli -RUN pip install -e . -i ${PYTHON_INDEX_URL} +RUN pip install -e ".[server]" -i ${PYTHON_INDEX_URL} ENV RERANKING_MODEL_PATH=BAAI/bge-reranker-large diff --git a/libs/cli/README.md b/libs/cli/README.md index a6000a4..7008320 100644 --- a/libs/cli/README.md +++ b/libs/cli/README.md @@ -13,6 +13,12 @@ installation. - Install the Python SDK `pip install kubeagi-cli` +- Install Server `pip install kubeagi-cli[server]` + +- Install Evaluation `pip install kubeagi-cli[eval]` + +- Install Core `pip install kubeagi-cli[core]` + At this point, you should be able to run the following code: ```shell diff --git a/libs/cli/kubeagi_cli/cli.py b/libs/cli/kubeagi_cli/cli.py index b99fff2..069f90a 100644 --- a/libs/cli/kubeagi_cli/cli.py +++ b/libs/cli/kubeagi_cli/cli.py @@ -16,9 +16,8 @@ import typer from typing_extensions import Annotated -from kubeagi_core.evaluation.ragas_eval import RagasEval + from kubeagi_cli import convert -from kubeagi_cli.server import webapp __version__ = "0.0.1" @@ -52,6 +51,7 @@ def serve( ] = "info", ): import uvicorn + from kubeagi_cli.server import webapp uvicorn.run(app=webapp, host=host, port=port, log_level=log_level) @@ -113,6 +113,8 @@ def evaluate( ), ] = None, ): + from kubeagi_cli.evaluation.ragas_eval import RagasEval + print("evaluate RAG(Retrieval Augmented Generation)") # Initialize ragas_once with provided arguments eval = RagasEval( diff --git a/libs/cli/kubeagi_cli/convert.py b/libs/cli/kubeagi_cli/convert.py index b89edb2..8de0715 100644 --- a/libs/cli/kubeagi_cli/convert.py +++ b/libs/cli/kubeagi_cli/convert.py @@ -14,14 +14,10 @@ import typer -import ujson from typing import List from typing_extensions import Annotated -from kubeagi_core.document_transformers.pdf2csv import PDF2CSVTransform - - convert_cli = typer.Typer(no_args_is_help=True, add_completion=False) @@ -56,6 +52,9 @@ def pdf( typer.Option(help="text chunk overlap"), ] = 50, ): + import ujson + from kubeagi_core.document_transformers.pdf2csv import PDF2CSVTransform + """ pdf transformer csv. """ diff --git a/libs/core/kubeagi_core/evaluation/__init__.py b/libs/cli/kubeagi_cli/evaluation/__init__.py similarity index 100% rename from libs/core/kubeagi_core/evaluation/__init__.py rename to libs/cli/kubeagi_cli/evaluation/__init__.py diff --git a/libs/core/kubeagi_core/evaluation/ragas_eval.py b/libs/cli/kubeagi_cli/evaluation/ragas_eval.py similarity index 100% rename from libs/core/kubeagi_core/evaluation/ragas_eval.py rename to libs/cli/kubeagi_cli/evaluation/ragas_eval.py diff --git a/libs/cli/pyproject.toml b/libs/cli/pyproject.toml index f8a7624..8285c13 100644 --- a/libs/cli/pyproject.toml +++ b/libs/cli/pyproject.toml @@ -17,16 +17,21 @@ classifiers = [ ] dependencies = [ "typer==0.9.0", +] + +[project.optional-dependencies] +dev = ["black==23.3.0", "pylint==3.1.0"] +eval = ["langchain>=0.1.0", "ragas>=0.1.0"] +server = [ "fastapi==0.109.0", "uvicorn==0.27.0", "FlagEmbedding==1.2.3", "BCEmbedding==0.1.3", - "kubeagi-core==0.0.1", - "ujson==5.9.0", + "sentencepiece", + "protobuf", + "ujson==5.9.0" ] - -[project.optional-dependencies] -dev = ["black==23.3.0", "pylint==3.1.0"] +core = ["kubeagi-core==0.0.1"] [project.urls] Homepage = "https://github.com/kubeagi/core-library" diff --git a/libs/core/README.md b/libs/core/README.md index a45dede..f64da08 100644 --- a/libs/core/README.md +++ b/libs/core/README.md @@ -13,6 +13,8 @@ installation. - Install the Python SDK `pip install kubeagi_core` +- For experimental features (e.g. reading PDF files) use `pip install kubeagi_core[experiment]` + At this point, you should be able to run the following code: ```python diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 791a910..e2cc518 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -20,23 +20,23 @@ dependencies = [ "emoji==2.2.0", "ftfy==6.1.1", "kubernetes==25.3.0", - "langchain>=0.1.0", - "ragas>=0.1.0", "opencc-python-reimplemented==0.1.7", "selectolax==0.3.17", "spacy==3.5.4", "zhipuai==1.0.7", +] + +[project.optional-dependencies] +dev = ["black==23.3.0", "pylint==3.1.0"] +experiment = [ "unstructured==0.12.0", "unstructured-inference==0.7.21", "unstructured.pytesseract==0.3.12", "pdf2image==1.17.0", "pdfminer.six==20231228", - "pikepdf==8.13.0", + "pikepdf==8.13.0" ] -[project.optional-dependencies] -dev = ["black==23.3.0", "pylint==3.1.0"] - [project.urls] Homepage = "https://github.com/kubeagi/core-library" Issues = "https://github.com/kubeagi/core-library/issues" \ No newline at end of file From c9ee5591cbb3db80a575552fb1fc215d93bb48c1 Mon Sep 17 00:00:00 2001 From: Lanture1064 <34346740+Lanture1064@users.noreply.github.com> Date: Wed, 20 Mar 2024 14:53:31 +0800 Subject: [PATCH 2/3] Update server_test.yaml --- .github/workflows/server_test.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/server_test.yaml b/.github/workflows/server_test.yaml index 3c28628..3df7df9 100644 --- a/.github/workflows/server_test.yaml +++ b/.github/workflows/server_test.yaml @@ -60,6 +60,7 @@ jobs: - name: Run test script run: | sleep 10 + docker log cli-container docker cp tests/example_test.sh cli-container:/tmp/example_test.sh docker exec cli-container bash /tmp/example_test.sh - name: Stop and Remove Container From fb0677473f4ecb96df23bf92d2c5ade211dec135 Mon Sep 17 00:00:00 2001 From: Lanture1064 <34346740+Lanture1064@users.noreply.github.com> Date: Wed, 20 Mar 2024 15:08:23 +0800 Subject: [PATCH 3/3] Update server_test.yaml --- .github/workflows/server_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/server_test.yaml b/.github/workflows/server_test.yaml index 3df7df9..3ee0fb3 100644 --- a/.github/workflows/server_test.yaml +++ b/.github/workflows/server_test.yaml @@ -60,7 +60,7 @@ jobs: - name: Run test script run: | sleep 10 - docker log cli-container + docker logs cli-container docker cp tests/example_test.sh cli-container:/tmp/example_test.sh docker exec cli-container bash /tmp/example_test.sh - name: Stop and Remove Container