diff --git a/docs/images/data-process.drawio b/docs/images/data-process.drawio
new file mode 100644
index 000000000..df5ea5cb7
--- /dev/null
+++ b/docs/images/data-process.drawio
@@ -0,0 +1,109 @@
+<mxfile host="Electron" modified="2023-11-02T10:49:05.695Z" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.2.8 Chrome/112.0.5615.165 Electron/24.2.0 Safari/537.36" etag="MtKcBN_l9eNnbYWFwm7D" version="21.2.8" type="device">
+  <diagram name="第 1 页" id="loeKpyqY9KO9q6GEpTu8">
+    <mxGraphModel dx="1026" dy="1843" grid="0" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
+      <root>
+        <mxCell id="0" />
+        <mxCell id="1" parent="0" />
+        <mxCell id="iNnB15NnGvmIqP9MPX9o-1" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;数据处理HTTP服务&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4472c4;strokeColor=none;fontColor=#ffffff;" parent="1" vertex="1">
+          <mxGeometry x="9" y="-170" width="810" height="40" as="geometry" />
+        </mxCell>
+        <mxCell id="iNnB15NnGvmIqP9MPX9o-23" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dce6f2;strokeColor=none;" parent="1" vertex="1">
+          <mxGeometry x="120" y="-120" width="700" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="iNnB15NnGvmIqP9MPX9o-24" value="&lt;font color=&quot;#ffffff&quot; style=&quot;font-size: 18px;&quot;&gt;Controller&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4472c4;strokeColor=none;" parent="1" vertex="1">
+          <mxGeometry x="9" y="-120" width="100" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="iNnB15NnGvmIqP9MPX9o-25" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;Service&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4472c4;fontColor=#FFFFFF;strokeColor=none;" parent="1" vertex="1">
+          <mxGeometry x="9" y="-55" width="100" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="iNnB15NnGvmIqP9MPX9o-26" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;Handle&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4472c4;fontColor=#FFFFFF;strokeColor=none;" parent="1" vertex="1">
+          <mxGeometry x="9" y="10" width="100" height="115" as="geometry" />
+        </mxCell>
+        <mxCell id="iNnB15NnGvmIqP9MPX9o-27" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;Transform&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4472c4;fontColor=#FFFFFF;strokeColor=none;" parent="1" vertex="1">
+          <mxGeometry x="9" y="130" width="100" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="iNnB15NnGvmIqP9MPX9o-29" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;基础类&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4472c4;fontColor=#ffffff;strokeColor=none;" parent="1" vertex="1">
+          <mxGeometry x="9" y="197" width="100" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-3" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dce6f2;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="120" y="-55" width="700" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-5" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;minio_process&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#a20025;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="126" y="-50" width="208" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-6" value="&lt;font color=&quot;#ffffff&quot; style=&quot;font-size: 18px;&quot;&gt;text_clean_for_minio&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#5b9bd5;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="127" y="-116" width="689" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-7" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;database_process&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#a20025;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="339.5" y="-50" width="239" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-8" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;web_api_process&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#a20025;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="584" y="-50" width="229" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-9" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dce6f2;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="120" y="10" width="700" height="115" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-10" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;json&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="126" y="16" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-11" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;csv&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="126" y="70" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-12" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;txt&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="265" y="16" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-13" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;pdf&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="265" y="70" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-14" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;doc&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="403" y="16" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-15" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;markdown&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="403" y="70" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-16" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;html&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="541" y="16" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-17" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;ppt&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="541" y="70" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-18" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;excel&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#d80073;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="679" y="16" width="135" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-19" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dce6f2;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="120" y="130" width="700" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-20" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;text&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#008a00;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="126" y="135" width="225" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-21" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;image&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#008a00;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="357.5" y="135" width="225" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-22" value="&lt;span style=&quot;font-size: 18px;&quot;&gt;table&lt;/span&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#008a00;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="589" y="135" width="225" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-23" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dce6f2;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="120" y="197" width="700" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-24" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;utils&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#1ba1e2;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="126" y="202" width="225" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-25" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;common&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#1ba1e2;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="358" y="202" width="225" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-26" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;OCR&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#1ba1e2;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="588" y="202" width="225" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-27" value="" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#dce6f2;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="119.5" y="262" width="700" height="60" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-28" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;Sanic&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#6a00ff;fontColor=#ffffff;strokeColor=#3700CC;" vertex="1" parent="1">
+          <mxGeometry x="126" y="267" width="684" height="50" as="geometry" />
+        </mxCell>
+        <mxCell id="6DU0XRBK3AAMFuq2KatI-29" value="&lt;font style=&quot;font-size: 18px;&quot;&gt;Web框架&lt;/font&gt;" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#4472c4;fontColor=#ffffff;strokeColor=none;" vertex="1" parent="1">
+          <mxGeometry x="9" y="262" width="100" height="60" as="geometry" />
+        </mxCell>
+      </root>
+    </mxGraphModel>
+  </diagram>
+</mxfile>
diff --git a/docs/images/data-process.drawio.png b/docs/images/data-process.drawio.png
new file mode 100644
index 000000000..7f2bb48cf
Binary files /dev/null and b/docs/images/data-process.drawio.png differ
diff --git a/pypi/data-processing/README.md b/pypi/data-processing/README.md
index 8ff1e59ca..258d8f442 100644
--- a/pypi/data-processing/README.md
+++ b/pypi/data-processing/README.md
@@ -19,7 +19,7 @@ The data processing process includes: cleaning abnormal data, filtering, de-dupl
 
 ## Design
 
-![Design](../assets/data_process.drawio.png)
+![Design](../../docs/images/data-process.drawio.png)
 
 ## Local Development
 ### Software Requirements
diff --git a/pypi/data-processing/requirements.txt b/pypi/data-processing/requirements.txt
index 15a97a6f6..20a366d01 100644
--- a/pypi/data-processing/requirements.txt
+++ b/pypi/data-processing/requirements.txt
@@ -6,7 +6,7 @@ aiohttp==3.8.6
 ulid==1.1
 minio==7.1.17
 zhipuai==1.0.7
-langchain==0.0.336
+langchain==0.0.354
 spacy==3.5.4
 pypdf==3.17.1
 emoji==2.2.0
diff --git a/pypi/data-processing/src/database_operate/dp_document_image_db_operate.py b/pypi/data-processing/src/database_operate/dp_document_image_db_operate.py
index a8bc57c30..36e77b1d5 100644
--- a/pypi/data-processing/src/database_operate/dp_document_image_db_operate.py
+++ b/pypi/data-processing/src/database_operate/dp_document_image_db_operate.py
@@ -15,6 +15,7 @@
 from database_clients import postgresql_pool_client
 from utils import date_time_utils
 
+
 def add(
     req_json,
     pool
diff --git a/pypi/data-processing/src/database_operate/dp_document_web_url_db_operate.py b/pypi/data-processing/src/database_operate/dp_document_web_url_db_operate.py
index 003009d8e..f665ab19b 100644
--- a/pypi/data-processing/src/database_operate/dp_document_web_url_db_operate.py
+++ b/pypi/data-processing/src/database_operate/dp_document_web_url_db_operate.py
@@ -16,6 +16,7 @@
 from database_clients import postgresql_pool_client
 from utils import date_time_utils
 
+
 def add(
     req_json,
     pool
diff --git a/pypi/data-processing/src/document_loaders/async_playwright.py b/pypi/data-processing/src/document_loaders/async_playwright.py
index 0fd6272da..f7e6ae6ea 100644
--- a/pypi/data-processing/src/document_loaders/async_playwright.py
+++ b/pypi/data-processing/src/document_loaders/async_playwright.py
@@ -13,13 +13,17 @@
 # limitations under the License.
 
 import logging
+import time
+import traceback
 from typing import List
 
-from langchain_community.document_loaders.base import BaseLoader
+import playwright
 from langchain_community.document_transformers import Html2TextTransformer
 from langchain_core.documents import Document
+from playwright.async_api import async_playwright
 
 from common import log_tag_const
+from document_loaders.base import BaseLoader
 
 logger = logging.getLogger(__name__)
 
@@ -32,7 +36,7 @@ def __init__(
         url: str,
         max_count: int = 100,
         max_depth: int = 1,
-        interval_time: int = 1,
+        interval_time: int = 1000,
     ):
         """
         Initialize the loader with a list of URL paths.
@@ -46,18 +50,17 @@ def __init__(
         Raises:
             ImportError: If the required 'playwright' package is not installed.
         """
-        self.url = url
-        self.max_count = max_count
-        self.max_depth = max_depth
-        self.interval_time = interval_time
-
-        try:
-            import playwright
-        except ImportError:
-            raise ImportError(
-                "playwright is required for AsyncPlaywrightLoader. "
-                "Please install it with `pip install playwright`."
-            )
+        if max_count is None:
+            max_count = 100
+        if max_depth is None:
+            max_depth = 1
+        if interval_time is None:
+            interval_time = 1000
+
+        self._url = url
+        self._max_count = max_count
+        self._max_depth = max_depth
+        self._interval_time = interval_time / 1000
 
     async def ascrape_playwright(self, url: str) -> str:
         """
@@ -70,7 +73,6 @@ async def ascrape_playwright(self, url: str) -> str:
             str: The scraped HTML content or an error message if an exception occurs.
 
         """
-        from playwright.async_api import async_playwright
 
         logger.info("Starting scraping...")
         results = ""
@@ -121,20 +123,18 @@ async def get_all_url(self):
             "".join(
                 [
                     f"{log_tag_const.WEB_CRAWLING} Get all url in a web page\n",
-                    f"  url: {self.url}"
+                    f"  url: {self._url}"
                 ]
             )
         )
 
-        all_url = [self.url]
-        sub_urls = [self.url]
-
+        all_url = [self._url]
+        sub_urls = [self._url]
         try:
-            for i in range(1, self.max_depth):
+            for i in range(1, self._max_depth):
                 for sub_url in sub_urls:
                     children_urls = await self._get_children_url(
                         url=sub_url,
-                        max_count=self.max_count,
                         url_count=len(all_url)
                     )
 
@@ -147,12 +147,12 @@ async def get_all_url(self):
                         all_url = list(unique_urls)
 
                         # 如果达到最大数量限制，直接返回
-                        if res.get("url_count") >= self.max_count:
+                        if res.get("url_count") >= self._max_count:
                             logger.info(
                                 "".join(
                                     [
                                         f"{log_tag_const.WEB_CRAWLING} The number of URLs has reached the upper limit.\n",
-                                        f"  max_count: {self.max_count}\n"
+                                        f"  max_count: {self._max_count}\n"
                                     ]
                                 )
                             )
@@ -160,8 +160,8 @@ async def get_all_url(self):
 
                         sub_urls = res.get("children_url")
                         # 时间间隔
-                        logger.info(f"{log_tag_const.WEB_CRAWLING} Wait for {self.interval_time} seconds before continuing the visit.")
-                        time.sleep(self.interval_time)
+                        logger.info(f"{log_tag_const.WEB_CRAWLING} Wait for {self._interval_time} seconds before continuing the visit.")
+                        time.sleep(self._interval_time)
             return all_url
         except Exception:
             logger.error(
@@ -188,7 +188,7 @@ async def _get_children_url(self, url, url_count):
                 [
                     f"{log_tag_const.WEB_CRAWLING} Get sub url in a web page\n",
                     f"  url: {url}\n",
-                    f"  max_count: {self.max_count}\n",
+                    f"  max_count: {self._max_count}\n",
                     f"  url_count: {url_count}"
                 ]
             )
@@ -209,12 +209,12 @@ async def _get_children_url(self, url, url_count):
                 for link in links:
                     href = await link.get_attribute('href')
                     # 需要抓取的url数量不得超过最大数量
-                    if url_count >= self.max_count:
+                    if url_count >= self._max_count:
                         logger.info(
                             "".join(
                                 [
                                     f"{log_tag_const.WEB_CRAWLING} The number of URLs has reached the upper limit.\n",
-                                    f"  max_count: {self.max_count}\n",
+                                    f"  max_count: {self._max_count}\n",
                                     f"  url_count: {url_count}"
                                 ]
                             )
diff --git a/pypi/data-processing/src/document_loaders/base.py b/pypi/data-processing/src/document_loaders/base.py
index a31e22eb5..7830efd4a 100644
--- a/pypi/data-processing/src/document_loaders/base.py
+++ b/pypi/data-processing/src/document_loaders/base.py
@@ -17,6 +17,7 @@
 
 from langchain_core.documents import Document
 
+
 class BaseLoader(ABC):
     """Interface for Document Loader.