Skip to content

Commit

Permalink
parse
Browse files Browse the repository at this point in the history
  • Loading branch information
esteininger committed Mar 18, 2024
1 parent 84c0f47 commit 317adfd
Show file tree
Hide file tree
Showing 3 changed files with 3,411 additions and 90 deletions.
8 changes: 5 additions & 3 deletions src/parsers/files/text/service.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import aiohttp
from unstructured.partition.api import partition_via_api
from unstructured.partition.auto import partition
from config import unstructured

from _exceptions import InternalServerError
Expand All @@ -14,12 +14,14 @@ def __init__(self, file_stream, metadata):

async def run(self):
try:
elements = partition_via_api(
elements = partition(
file=self.file_stream,
api_key=self.api_key,
strategy="auto",
chunking_strategy="basic",
pdf_infer_table_structure="true",
metadata_filename=self.metadata["filename"],
# strategy="hi_res",
# hi_res_model_name="yolox_quantized",
)
for e in elements:
self.chunks.append(e.to_dict())
Expand Down
Loading

0 comments on commit 317adfd

Please sign in to comment.