Skip to content

Commit

Permalink
Merge pull request #611 from wangxinbiao/main
Browse files Browse the repository at this point in the history
chore: run pylint locally and fix lint issues
  • Loading branch information
bjwswang committed Jan 23, 2024
2 parents 9c9e8e1 + c38505d commit 93be6a6
Show file tree
Hide file tree
Showing 28 changed files with 125 additions and 381 deletions.
6 changes: 2 additions & 4 deletions pypi/data-processing/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
__pycache__
.ipynb_checkpoints

data-processing/src/mock_data
src/log

data-processing/src/log

data-processing/src/file_handle/temp_file
src/file_handle/temp_file
2 changes: 1 addition & 1 deletion pypi/data-processing/src/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import logging
import os

import log_tag_const
from common import log_tag_const
from kube import minio_cr, model_cr, postgresql_cr
from utils.class_utils import Singleton

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from sanic import Blueprint
from sanic.response import json

from service import data_process_service
from transform.text import support_type

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
import traceback

import urllib3
from common import log_tag_const
from common.config import config
from minio import Minio
from minio.commonconfig import Tags
from minio.error import S3Error

from common import log_tag_const
from common.config import config
from utils import file_utils

logger = logging.getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import traceback

import ulid

from common import const, log_tag_const
from common.config import config
from data_store_clients import minio_store_client
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
import traceback

import psycopg2.extras
from common import log_tag_const
from dbutils.pooled_db import PooledDB

from common import log_tag_const

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -54,7 +55,7 @@ def get_connection_from_pool(pool):
return pool.connection()


def execute_query(pool, sql, params={}):
def execute_query(pool, sql, params):
"""Execute a query with the parameters."""
error = ""
data = []
Expand Down Expand Up @@ -89,7 +90,7 @@ def execute_query(pool, sql, params={}):
return {"status": 200, "message": "", "data": data}


def execute_count_query(pool, sql, params={}):
def execute_count_query(pool, sql, params):
"""Execute a count query with the parameters."""
error = ""
data = None
Expand Down Expand Up @@ -117,7 +118,7 @@ def execute_count_query(pool, sql, params={}):
return {"status": 200, "message": "", "data": data}


def execute_update(pool, sql, params={}):
def execute_update(pool, sql, params):
"""Execute a update with the parameters."""
error = ""
data = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@
# limitations under the License.


import ujson
from database_clients import postgresql_pool_client
from utils import date_time_utils
from utils import date_time_utils, json_utils


def list_by_page(req_json, pool):
Expand Down Expand Up @@ -106,10 +105,10 @@ def add(req_json, pool, id):
"pre_data_set_name": req_json["pre_data_set_name"],
"pre_data_set_version": req_json["pre_data_set_version"],
"pre_version_data_set_name": req_json["version_data_set_name"],
"file_names": ujson.dumps(req_json["file_names"]),
"file_names": json_utils.dumps(req_json["file_names"]),
"post_data_set_name": req_json["post_data_set_name"],
"post_data_set_version": req_json["post_data_set_version"],
"data_process_config_info": ujson.dumps(req_json["data_process_config_info"]),
"data_process_config_info": json_utils.dumps(req_json["data_process_config_info"]),
"start_datetime": now,
"create_datetime": now,
"create_user": user,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@


import ulid

from database_clients import postgresql_pool_client
from utils import date_time_utils

Expand Down
8 changes: 4 additions & 4 deletions pypi/data-processing/src/file_handle/common_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import traceback

import ulid

from common import log_tag_const
from common.config import config
from database_operate import (data_process_detail_db_operate,
Expand Down Expand Up @@ -859,15 +860,15 @@ def _qa_split(
else:
# 将QA数据存入表中
qa_data = qa_response.get("data")
for i in range(len(qa_data)):
for _, item in enumerate(qa_data):
qa_insert_item = {
"id": ulid.ulid(),
"task_id": task_id,
"document_id": document_id,
"document_chunk_id": document_chunk_id,
"file_name": file_name,
"question": qa_data[i][0],
"answer": qa_data[i][1],
"question": item[0],
"answer": item[1],
"create_user": create_user,
}

Expand Down Expand Up @@ -1073,7 +1074,6 @@ def _updata_document_status_and_end_time(id, status, conn_pool):

def _updata_document_progress(id, progress, update_user, conn_pool):
try:
now = date_time_utils.now_str()
document_update_item = {
"id": id,
"update_user": update_user,
Expand Down
119 changes: 0 additions & 119 deletions pypi/data-processing/src/file_handle/csv_handle.py

This file was deleted.

10 changes: 5 additions & 5 deletions pypi/data-processing/src/file_handle/pdf_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@
import logging
import traceback

import ujson
import ulid
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import SpacyTextSplitter

from common import log_tag_const
from common.config import config
from database_operate import data_process_document_chunk_db_operate
from file_handle import common_handle
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import SpacyTextSplitter
from utils import file_utils
from utils import file_utils, json_utils

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -75,7 +75,7 @@ def text_manipulate(
"task_id": task_id,
"status": "not_start",
"content": content,
"meta_info": ujson.dumps(meta_info, ensure_ascii=False),
"meta_info": json_utils.dumps(meta_info),
"page_number": page,
"creator": create_user,
}
Expand Down
3 changes: 2 additions & 1 deletion pypi/data-processing/src/file_handle/word_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
import traceback

import ulid
from langchain.text_splitter import SpacyTextSplitter

from common import log_tag_const
from common.config import config
from database_operate import data_process_document_chunk_db_operate
from file_handle import common_handle
from langchain.text_splitter import SpacyTextSplitter
from utils import docx_utils, file_utils

logger = logging.getLogger(__name__)
Expand Down
15 changes: 3 additions & 12 deletions pypi/data-processing/src/kube/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
import os
import traceback

from common import log_tag_const
from kubernetes import client, config
from kubernetes import config
from kubernetes.client import CoreV1Api, CustomObjectsApi

from common import log_tag_const

from .custom_resources import (arcadia_resource_datasets,
arcadia_resource_datasources,
arcadia_resource_models,
Expand Down Expand Up @@ -116,16 +117,6 @@ def get_versioneddatasets_status(self, namespace: str, name: str):
name,
)

def patch_versioneddatasets_status(self, namespace: str, name: str, status: any):
CustomObjectsApi().patch_namespaced_custom_object_status(
arcadia_resource_versioneddatasets.get_group(),
arcadia_resource_versioneddatasets.get_version(),
namespace,
arcadia_resource_versioneddatasets.get_name(),
name,
status,
)

def get_versionedmodels_status(self, namespace: str, name: str):
return CustomObjectsApi().get_namespaced_custom_object_status(
arcadia_resource_models.get_group(),
Expand Down
33 changes: 0 additions & 33 deletions pypi/data-processing/src/kube/dataset_cr.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ def update_dataset_k8s_cr(namespace, version_data_set_name, reason, message):
found_index = i
break

result = None
if found_index is None:
conditions.append(
{
Expand Down Expand Up @@ -74,35 +73,3 @@ def update_dataset_k8s_cr(namespace, version_data_set_name, reason, message):
logger.error(str(ex))
return {"status": 400, "message": "更新数据集状态失败", "data": ""}


def get_dataset_status_k8s_cr(namespace, version_data_set_name):
"""get the condition info for the dataset.
namespace: namespace;
version_data_set_name: version dataset name;
"""
try:
dataset_status = None
kube = client.KubeEnv()

one_cr_datasets = kube.get_versioneddatasets_status(
namespace, version_data_set_name
)

conditions = one_cr_datasets["status"]["conditions"]

found_index = None
for i in range(len(conditions)):
item = conditions[i]
if item["type"] == "DataProcessing":
found_index = i
break

result = None
if found_index:
dataset_status = conditions[found_index].get("reason")

return {"status": 200, "message": "获取数据集状态成功", "data": dataset_status}
except Exception as ex:
logger.error(str(ex))
return {"status": 400, "message": "获取数据集状态失败", "data": ""}
Loading

0 comments on commit 93be6a6

Please sign in to comment.