Skip to content

Commit

Permalink
fix: always overwrite object file azure
Browse files Browse the repository at this point in the history
  • Loading branch information
cakeinsauce committed Sep 27, 2024
1 parent c62db73 commit 666ca48
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 27 deletions.
1 change: 0 additions & 1 deletion annotation/annotation/annotations/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def upload_json_to_minio(
bd_storage.get_storage(bucket_name).upload_obj(
target_path=path_to_object,
file=io.BytesIO(json_obj.encode("UTF-8")),
overwrite=True,
)


Expand Down
2 changes: 1 addition & 1 deletion annotation/tests/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def test_upload_json_to_minio():
upload_json_to_minio(test_json, path_to_object, bucket_name, Mock())
mock_get_storage.assert_called_once_with(bucket_name)
mock_get_storage().upload_obj.assert_called_once_with(
target_path=path_to_object, file=ANY, overwrite=True
target_path=path_to_object, file=ANY
)


Expand Down
11 changes: 4 additions & 7 deletions assets/assets/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,13 +317,10 @@ def convert_html(self) -> bytes:
return tmp_file.read()

def convert_pdf(self) -> bytes:
try:
self.storage.upload_obj(
target_path=self._output_pdf_path,
file=BytesIO(self.file_bytes),
)
except badgerdoc_storage.storage.BadgerDocStorageResourceExistsError:
logger_.warning("File %s exists", self._output_pdf_path)
self.storage.upload_obj(
target_path=self._output_pdf_path,
file=BytesIO(self.file_bytes),
)
logger_.debug("File has been uploaded", self.file_name)
post_pdf_to_convert(
self.bucket_storage,
Expand Down
6 changes: 0 additions & 6 deletions assets/assets/utils/minio_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,6 @@ def put_file_to_minio(
except urllib3.exceptions.MaxRetryError as e:
logger_.error(f"Connection error - detail: {e}")
return False
except bd_storage.BadgerDocStorageResourceExistsError:
logger_.info("File %s already exists", file_obj.original_name)
return True
logger_.info(f"File {file_obj.original_name} successfully uploaded")
return True

Expand Down Expand Up @@ -229,9 +226,6 @@ def upload_thumbnail(
except urllib3.exceptions.MaxRetryError as e:
logger_.error(f"Connection error - detail: {e}")
return False
except bd_storage.BadgerDocStorageResourceExistsError:
logger_.info("Thumbnail %s exists", path)
return True
logger_.info("Thumbnail %s uploaded", path)
return True

Expand Down
13 changes: 1 addition & 12 deletions lib/badgerdoc_storage/src/badgerdoc_storage/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from typing import Any, Dict, List, Optional, Protocol
from urllib.parse import urlsplit

import azure.core.exceptions
import boto3
from azure.core.exceptions import ResourceExistsError
from azure.storage.blob import (
Expand Down Expand Up @@ -61,10 +60,6 @@ class BadgerDocStorageError(Exception):
pass


class BadgerDocStorageResourceExistsError(BadgerDocStorageError):
pass


class BadgerDocStorage(Protocol):
def upload(
self, target_path: str, file: str, content_type: Optional[str] = None
Expand All @@ -76,7 +71,6 @@ def upload_obj(
target_path: str,
file: bytes,
content_type: Optional[str] = None,
**kwargs: Any,
) -> None:
pass

Expand Down Expand Up @@ -124,7 +118,6 @@ def upload_obj(
target_path: str,
file: bytes,
content_type: Optional[str] = None,
**kwargs: Any,
) -> None:
params: Dict[str, Any] = {"Fileobj": file, "Key": target_path}
if content_type:
Expand Down Expand Up @@ -218,19 +211,15 @@ def upload_obj(
target_path: str,
file: bytes,
content_type: Optional[str] = None,
**kwargs: Any,
) -> None:
overwrite = kwargs.get("overwrite", False)
try:
blob_client = self.blob_service_client.get_blob_client(
self._container_name, target_path
)
blob_client.upload_blob(file, overwrite=overwrite)
blob_client.upload_blob(file, overwrite=True)
if content_type:
blob_headers = ContentSettings(content_type=content_type)
blob_client.set_http_headers(blob_headers)
except azure.core.exceptions.ResourceExistsError as err:
raise BadgerDocStorageResourceExistsError() from err
except Exception as err:
raise BadgerDocStorageError(
f"Unable to upload file into {target_path}"
Expand Down

0 comments on commit 666ca48

Please sign in to comment.