Skip to content

Commit

Permalink
chore: add pylint action and fix lint issues
Browse files Browse the repository at this point in the history
Signed-off-by: bjwswang <bjwswang@gmail.com>
  • Loading branch information
bjwswang committed Jan 22, 2024
1 parent b6f9dfa commit 8bae9aa
Show file tree
Hide file tree
Showing 54 changed files with 3,080 additions and 3,920 deletions.
File renamed without changes.
19 changes: 19 additions & 0 deletions .github/workflows/pypi_lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: pylint

on: pull_request

jobs:
gitHubActionForPylint:
name: GitHub Action for pylint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Pylint on data-processing
uses: cclauss/GitHub-Action-for-pylint@0.7.0
with:
args: pylint --rcfile .pylintrc ./pypi/data-processing/**/*.py
- name: Pylint on ragas_once
uses: cclauss/GitHub-Action-for-pylint@0.7.0
with:
args: pylint --rcfile .pylintrc ./pypi/ragas_once/**/*.py

15 changes: 10 additions & 5 deletions .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ indent-after-paren=4
indent-string=' '

# Maximum number of characters on a single line.
max-line-length=100
max-line-length=200

# Maximum number of lines in a module.
max-module-lines=1000
Expand Down Expand Up @@ -427,10 +427,15 @@ disable=raw-checker-failed,
file-ignored,
suppressed-message,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
use-implicit-booleaness-not-comparison-to-string,
use-implicit-booleaness-not-comparison-to-zero
deprecated-pragma,redefined-outer-name,
use-symbolic-message-instead,missing-class-docstring,
missing-module-docstring,too-many-instance-attributes,
logging-fstring-interpolation,too-few-public-methods,
invalid-character-zero-width-space,missing-function-docstring,
duplicate-value,too-many-lines,dangerous-default-value,deprecated-method,
broad-exception-caught,redefined-builtin,c-extension-no-member,too-many-arguments,
too-many-branches,too-many-locals,too-many-statements,f-string-without-interpolation,
consider-using-enumerate,

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,20 @@ prepare-push: manifests generate fmt vet gql-gen
@go install github.com/swaggo/swag/cmd/swag@latest
@swag init -o apiserver/docs .

PYTHON_INDEX_URL ?=https://pypi.mirrors.ustc.edu.cn/simple/
.PHONY: prepare-push-pypi
prepare-push-pypi:
@echo "install black"
@pip install pylint black isort -i ${PYTHON_INDEX_URL}
@echo "format python code"
@black ./pypi/**/*.py
@echo "sort python imports"
@isort ./pypi/**/*.py
@echo "run pylint on data-processing"
@pylint --rcfile .pylintrc ./pypi/data-processing/**/*.py
@echo "run pylint on ragas-once"
@pylint --rcfile .pylintrc ./pypi/ragas_once/**/*.py

# Commands for Data-Processing
DATA_PROCESSING_IMAGE ?= kubebb/dp-base

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
<a href="https://goreportcard.com/report/github.com/kubeagi/arcadia">
<img alt="Go Report Card" src="https://goreportcard.com/badge/kubeagi/arcadia?style=flat-square" />
</a>
<a href="https://github.com/pylint-dev/pylint">
<img alt="Pylint Card" src="https://github.com/kubeagi/arcadia/actions/workflows/pypi_lint/badge.svg" />
</a>
<a href="https://github.com/psf/black">
<img alt="CodeStyle" src="https://img.shields.io/badge/code%20style-black-000000.svg" />
</a>
</p>
</div>

Expand Down
2 changes: 1 addition & 1 deletion config/crd/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ resources:
- bases/arcadia.kubeagi.k8s.com.cn_applications.yaml
- bases/chain.arcadia.kubeagi.k8s.com.cn_llmchains.yaml
- bases/chain.arcadia.kubeagi.k8s.com.cn_retrievalqachains.yaml
- bases/chain.arcadia.kubeagi.k8s.com.cn_apichains.yaml
- bases/prompt.arcadia.kubeagi.k8s.com.cn_prompts.yaml
- bases/retriever.arcadia.kubeagi.k8s.com.cn_knowledgebaseretrievers.yaml
- bases/evaluation.arcadia.kubeagi.k8s.com.cn_rags.yaml
- bases/chain.kubeagi.k8s.com.cn_apichains.yaml
#+kubebuilder:scaffold:crdkustomizeresource

patchesStrategicMerge:
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/arcadia/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: v2
name: arcadia
description: A Helm chart(KubeBB Component) for KubeAGI Arcadia
type: application
version: 0.2.16
version: 0.2.17
appVersion: "0.1.0"

keywords:
Expand Down
2 changes: 1 addition & 1 deletion deploy/charts/arcadia/templates/dataprocess.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ spec:
command:
- "/bin/sh"
- "-c"
- "python /arcadia_app/data_manipulation/server.py"
- "python /arcadia_app/src/server.py"
---
apiVersion: v1
kind: Service
Expand Down
59 changes: 25 additions & 34 deletions pypi/data-processing/src/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,92 +15,83 @@

import logging
import os
import traceback
from pathlib import Path

import yaml

from kube import minio_cr, model_cr, postgresql_cr
from utils.class_utils import Singleton

from . import log_tag_const
import log_tag_const

logger = logging.getLogger(__name__)


class Config(metaclass=Singleton):
"""Configuration class to store the env values."""

def __init__(self):
logger.debug(f"{log_tag_const.CONFIG} start to load config file.")
self.__set_property_value()


def __set_property_value(self):
"""设置属性的值"""
# kubernetes
# namespace
k8s_pod_namespace = os.getenv('POD_NAMESPACE', 'arcadia')
k8s_pod_namespace = os.getenv("POD_NAMESPACE", "arcadia")
self.k8s_pod_namespace = k8s_pod_namespace
# config
k8s_default_config = os.getenv('DEFAULT_CONFIG', 'arcadia-config')
k8s_default_config = os.getenv("DEFAULT_CONFIG", "arcadia-config")
self.k8s_default_config = k8s_default_config


minio_config = minio_cr.get_minio_config_in_k8s_configmap(
namespace=k8s_pod_namespace,
config_map_name=k8s_default_config
namespace=k8s_pod_namespace, config_map_name=k8s_default_config
)
if minio_config is None:
minio_config = {}

# minio access key
self.minio_access_key = minio_config.get('minio_access_key')
self.minio_access_key = minio_config.get("minio_access_key")
# minio secret key
self.minio_secret_key = minio_config.get('minio_secret_key')
self.minio_secret_key = minio_config.get("minio_secret_key")
# minio api url
self.minio_api_url = minio_config.get('minio_api_url')
self.minio_api_url = minio_config.get("minio_api_url")
# minio secure
# if use HTTP, secure = False;
# if use HTTP, secure = False;
# if use HTTPS, secure = True;
self.minio_secure = minio_config.get('minio_secure')
self.minio_secure = minio_config.get("minio_secure")
# minio data set prefix
self.minio_dataset_prefix = 'dataset'
self.minio_dataset_prefix = "dataset"

llm_qa_retry_count = model_cr.get_llm_qa_retry_count_in_k8s_configmap(
namespace=k8s_pod_namespace,
config_map_name=k8s_default_config
)

namespace=k8s_pod_namespace, config_map_name=k8s_default_config
)

if llm_qa_retry_count is None:
llm_qa_retry_count = 5

self.llm_qa_retry_count = int(llm_qa_retry_count)
self.llm_qa_retry_count = int(llm_qa_retry_count)

# knowledge
# chunk size
self.knowledge_chunk_size = 500
# chunk overlap
self.knowledge_chunk_overlap = 50

# backend PostgreSQL
postgresql_config = postgresql_cr.get_postgresql_config_in_k8s_configmap(
namespace=k8s_pod_namespace,
config_map_name=k8s_default_config
)
namespace=k8s_pod_namespace, config_map_name=k8s_default_config
)
if postgresql_config is None:
postgresql_config = {}

# host
self.pg_host = postgresql_config.get('host')
self.pg_host = postgresql_config.get("host")
# port
self.pg_port = postgresql_config.get('port')
self.pg_port = postgresql_config.get("port")
# user
self.pg_user = postgresql_config.get('user')
self.pg_user = postgresql_config.get("user")
# password
self.pg_password = postgresql_config.get('password')
self.pg_password = postgresql_config.get("password")
# database name
self.pg_database = postgresql_config.get('database')
self.pg_database = postgresql_config.get("database")


config = Config()
22 changes: 9 additions & 13 deletions pypi/data-processing/src/common/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

llm_wait_seconds = 120
LLM_WAIT_SECONDS = 120

clean_support_type = [
'remove_invisible_characters',
'space_standardization',
'remove_garbled_text',
'traditional_to_simplified',
'remove_html_tag',
'remove_emojis'
]
privacy_support_type = [
'remove_email',
'remove_ip_address',
'remove_number'
CLEAN_SUPPORT_TYPE = [
"remove_invisible_characters",
"space_standardization",
"remove_garbled_text",
"traditional_to_simplified",
"remove_html_tag",
"remove_emojis",
]
PRIVACY_SUPPORT_TYPE = ["remove_email", "remove_ip_address", "remove_number"]
32 changes: 26 additions & 6 deletions pypi/data-processing/src/common/special_characters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
# limitations under the License.

import string

import emoji

# referenced from https://github.com/alibaba/data-juicer/blob/main/data_juicer/ops/common/special_characters.py#L26

# special characters
MAIN_SPECIAL_CHARACTERS = string.punctuation + string.digits \
+ string.whitespace
MAIN_SPECIAL_CHARACTERS = string.punctuation + string.digits + string.whitespace
OTHER_SPECIAL_CHARACTERS = (
"’ “— ™ – •‘œ    ˜ ‚ƒ„’“”–ー一▬…✦�­£​•€«»°·═"
"×士^˘⇓↓↑←→()§″′´¿−±∈¢ø‚„½¼¾¹²³―⁃,ˌ¸‹›ʺˈʻ¦‐⠀‰……‑≤≥‖"
Expand All @@ -34,6 +34,26 @@
# whitespaces in unicode can be found here:
# https://en.wikipedia.org/wiki/Whitespace_character
VARIOUS_WHITESPACES = {
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', '​', '‌', '‍', '⁠', '', '„'
}
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
" ",
"​",
"‌",
"‍",
"⁠",
"",
"„",
}
Loading

0 comments on commit 8bae9aa

Please sign in to comment.