Skip to content

Commit

Permalink
return token size in embedding controller
Browse files Browse the repository at this point in the history
  • Loading branch information
esteininger committed Mar 18, 2024
1 parent b1b40f3 commit 53f0e38
Show file tree
Hide file tree
Showing 8 changed files with 3,013 additions and 134 deletions.
3,060 changes: 2,956 additions & 104 deletions src/api/poetry.lock

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions src/api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ authors = ["Ethan Steininger <esteininger21@gmail.com>"]
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.10,<3.13"
python = ">=3.10,<3.12"
fastapi = "^0.110.0"
uvicorn = "^0.28.0"
haikunator = "^2.1.0"
magika = "0.5.1"
cryptography = "^42.0.5"
magika = "<0.5.1"
cryptography = "42.0.2"
kombu = "^5.3.5"
motor = "^3.3.2"
redis = "^5.0.3"
Expand All @@ -22,6 +22,9 @@ httpx = "^0.27.0"
pymongo = "4.6.0"
boto3 = "1.33.1"
aioboto3 = "12.1.0"
openai = "^1.14.1"
psycopg2 = "^2.9.9"
unstructured = {extras = ["all-docs"], version = "^0.12.6"}

[build-system]
requires = ["poetry-core"]
Expand Down
10 changes: 5 additions & 5 deletions src/inference/embed/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@
from .model import (
EmbeddingRequest,
EmbeddingResponse,
DimensionRequest,
DimensionsResponse,
ConfigsRequest,
ConfigsResponse,
)

from embed.service import EmbeddingHandler

router = APIRouter()


@router.get("/dimensions", response_model=DimensionsResponse)
async def get_dimensions(data: DimensionRequest):
@router.get("/configs", response_model=ConfigsResponse)
async def get_dimensions(data: ConfigsRequest):
embedding_handler = EmbeddingHandler(data.modality, data.model)
return embedding_handler.get_dimensions()
return embedding_handler.get_configs()


@router.get("/", response_model=EmbeddingResponse)
Expand Down
13 changes: 7 additions & 6 deletions src/inference/embed/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,17 @@ class Modality(Enum):
TEXT = "text"


class DimensionRequest(BaseModel):
class ConfigsRequest(BaseModel):
modality: Optional[Modality] = "text"
model: Optional[str] = "sentence-transformers/all-MiniLM-L6-v2"


class ConfigsResponse(BaseModel):
dimensions: int
elapsed_time: float
token_size: int


class EmbeddingRequest(BaseModel):
input: str
modality: Optional[Modality] = "text"
Expand All @@ -24,8 +30,3 @@ class EmbeddingRequest(BaseModel):
class EmbeddingResponse(BaseModel):
embedding: List[float]
elapsed_time: float


class DimensionsResponse(BaseModel):
dimensions: int
elapsed_time: float
7 changes: 6 additions & 1 deletion src/inference/embed/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ def encode(self, data):
"elapsed_time": (time.time() * 1000) - start_time,
}

def get_dimensions(self):
def get_configs(self):
start_time = time.time() * 1000
dimensions = self.service.get_dimensions()
token_size = self.service.get_token_size()
return {
"dimensions": dimensions,
"token_size": token_size,
"elapsed_time": (time.time() * 1000) - start_time,
}

Expand Down Expand Up @@ -59,3 +61,6 @@ def encode(self, sentences):

def get_dimensions(self):
return self.model.config.hidden_size

def get_token_size(self):
return self.tokenizer.model_max_length
16 changes: 16 additions & 0 deletions src/parsers/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from fastapi.responses import JSONResponse
from typing import Optional


def create_json_response(
success: bool, status: int, error: str, response: Optional[str]
):
return JSONResponse(
content={
"success": success,
"status": status,
"error": error,
"response": response,
},
status_code=status,
)
16 changes: 12 additions & 4 deletions src/parsers/files/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from .utils import generate_filename_from_url, get_filename_from_cd
from .text.service import TextService

from _exceptions import InternalServerError, NotFoundError, BadRequestError
from _utils import create_json_response

files = {
"text": ["pdf", "docx", "txt", "md", "html", "xml"],
Expand Down Expand Up @@ -47,6 +49,12 @@ def detect_filetype(self, contents):
try:
m = Magika()
res = m.identify_bytes(contents)
# {
# "label": "pdf",
# "description": "PDF document",
# "mime_type": "application/pdf",
# "group": "document",
# }
data = {
"label": res.output.ct_label,
"description": res.output.description,
Expand All @@ -63,15 +71,15 @@ async def parse_file(self):
# Download file into memory
contents, filename = await self.download_into_memory()
stream = BytesIO(contents)

# Detect file type
metadata = self.detect_filetype(stream.getvalue())
metadata["filename"] = filename
metadata["start_time"] = time.time() * 1000

if metadata["label"] == "pdf":
start_time = time.time() * 1000
text_service = TextService(stream, metadata)

text_service = TextService(stream, metadata)
if metadata["label"] == "pdf":
return await text_service.run_pdf()

else:
raise BadRequestError(error={"message": "File type not supported"})
16 changes: 5 additions & 11 deletions src/parsers/files/text/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from unstructured.cleaners.core import clean
from unstructured.chunking.basic import chunk_elements


from _utils import create_json_response
from _exceptions import InternalServerError


Expand All @@ -29,24 +29,18 @@ async def run_pdf(self):
try:
elements = partition_pdf(
file=self.file_stream,
infer_table_structure="true",
infer_table_structure=False,
metadata_filename=self.metadata["filename"],
# skip_infer_table_types=[],
strategy="hi_res",
hi_res_model_name="detectron2_onnx",
# strategy="hi_res",
# hi_res_model_name="detectron2_onnx",
)
chunks = self._chunk(elements)
for c in chunks:
response_obj = c.to_dict()
response_obj["text"] = self._clean(response_obj["text"])
self.chunks.append(response_obj)

return {
"success": True,
"status": 200,
"error": None,
"response": self.chunks,
}, 200
return create_json_response(True, 200, "", self.chunks)
except Exception as e:
error = {"message": str(e)}
raise InternalServerError(error)

0 comments on commit 53f0e38

Please sign in to comment.