Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
Merge pull request #89 from ds-wizard/release/3.6.0
Browse files Browse the repository at this point in the history
Release 3.6.0
  • Loading branch information
MarekSuchanek committed Dec 7, 2021
2 parents ba67435 + 0f6c56d commit 01f7d39
Show file tree
Hide file tree
Showing 16 changed files with 1,123 additions and 967 deletions.
10 changes: 10 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ logging:
level: INFO
globalLevel: WARNING

templates:
- ids:
- "dsw:"
requests:
enabled:
limit:
timeout:
secrets:
API_TOKEN:

#documents:
# naming:
# strategy: sanitize # uuid|slugify|sanitize
Expand Down
3 changes: 2 additions & 1 deletion document_worker/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def validate_config(ctx, param, value: IO):
@click.command(name='docworker')
@click.version_option(version=VERSION)
@click.argument('config', envvar='DOCWORKER_CONFIG',
type=click.File('r'), callback=validate_config)
type=click.File('r', encoding='utf-8'),
callback=validate_config)
@click.argument('workdir', envvar='DOCWORKER_WORKDIR',
type=click.Path(dir_okay=True, exists=True))
def main(config: DocumentWorkerConfig, workdir: str):
Expand Down
89 changes: 87 additions & 2 deletions document_worker/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import shlex
import yaml
from typing import List
from typing import List, Optional

from document_worker.consts import DocumentNamingStrategy

Expand Down Expand Up @@ -64,6 +64,16 @@ def __str__(self):
f'- naming_strategy = {self.naming_strategy}\n'


class ExperimentalConfig:

def __init__(self, more_apps_enabled: bool):
self.more_apps_enabled = more_apps_enabled

def __str__(self):
return f'ExperimentalConfig\n' \
f'- more_apps_enabled = {self.more_apps_enabled}\n'


class CommandConfig:

def __init__(self, executable: str, args: str, timeout: float):
Expand All @@ -82,16 +92,68 @@ def __str__(self):
f'- timeout = {self.timeout} ({type(self.timeout)})\n'


class TemplateRequestsConfig:

def __init__(self, enabled: bool, limit: int, timeout: int):
self.enabled = enabled
self.limit = limit
self.timeout = timeout

@staticmethod
def load(data: dict):
return TemplateRequestsConfig(
enabled=data.get('enabled', False),
limit=data.get('limit', 100),
timeout=data.get('timeout', 1),
)


class TemplateConfig:

def __init__(self, ids: List[str], requests: TemplateRequestsConfig,
secrets: dict[str, str]):
self.ids = ids
self.requests = requests
self.secrets = secrets

@staticmethod
def load(data: dict):
print(data)
return TemplateConfig(
ids=data.get('ids', []),
requests=TemplateRequestsConfig.load(
data.get('requests', {}),
),
secrets=data.get('secrets', {}),
)


class TemplatesConfig:

def __init__(self, templates: List[TemplateConfig]):
self.templates = templates

def get_config(self, template_id: str) -> Optional[TemplateConfig]:
for template in self.templates:
if any((template_id.startswith(prefix)
for prefix in template.ids)):
return template
return None


class DocumentWorkerConfig:

def __init__(self, db: DatabaseConfig, s3: S3Config, log: LoggingConfig,
doc: DocumentsConfig, pandoc: CommandConfig, wkhtmltopdf: CommandConfig):
doc: DocumentsConfig, pandoc: CommandConfig, wkhtmltopdf: CommandConfig,
templates: TemplatesConfig, experimental: ExperimentalConfig):
self.db = db
self.s3 = s3
self.log = log
self.doc = doc
self.pandoc = pandoc
self.wkhtmltopdf = wkhtmltopdf
self.templates = templates
self.experimental = experimental

def __str__(self):
return f'DocumentWorkerConfig\n' \
Expand All @@ -100,6 +162,7 @@ def __str__(self):
f'{self.s3}' \
f'{self.log}' \
f'{self.doc}' \
f'{self.experimental}' \
f'Pandoc: {self.pandoc}' \
f'WkHtmlToPdf: {self.wkhtmltopdf}' \
f'====================\n'
Expand All @@ -115,6 +178,8 @@ class DocumentWorkerConfigParser:
EXTERNAL_SECTION = 'externals'
PANDOC_SUBSECTION = 'pandoc'
WKHTMLTOPDF_SUBSECTION = 'wkhtmltopdf'
TEMPLATES_SECTION = 'templates'
EXPERIMENTAL_SECTION = 'experimental'

DEFAULTS = {
DB_SECTION: {
Expand Down Expand Up @@ -150,6 +215,10 @@ class DocumentWorkerConfigParser:
'timeout': None,
},
},
TEMPLATES_SECTION: [],
EXPERIMENTAL_SECTION: {
'moreAppsEnabled': False,
},
}

REQUIRED = [] # type: list[str]
Expand Down Expand Up @@ -247,6 +316,20 @@ def pandoc(self) -> CommandConfig:
def wkhtmltopdf(self) -> CommandConfig:
return self._command_config(self.EXTERNAL_SECTION, self.WKHTMLTOPDF_SUBSECTION)

@property
def templates(self) -> TemplatesConfig:
templates_data = self.get_or_default(self.TEMPLATES_SECTION)
templates = [TemplateConfig.load(data) for data in templates_data]
return TemplatesConfig(
templates=templates,
)

@property
def experimental(self) -> ExperimentalConfig:
return ExperimentalConfig(
more_apps_enabled=self.get_or_default(self.EXPERIMENTAL_SECTION, 'moreAppsEnabled'),
)

@property
def config(self) -> DocumentWorkerConfig:
return DocumentWorkerConfig(
Expand All @@ -256,4 +339,6 @@ def config(self) -> DocumentWorkerConfig:
doc=self.documents,
pandoc=self.pandoc,
wkhtmltopdf=self.wkhtmltopdf,
templates=self.templates,
experimental=self.experimental,
)
8 changes: 6 additions & 2 deletions document_worker/connection/s3storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,15 @@ def ensure_bucket(self):
before=tenacity.before_log(Context.logger, logging.DEBUG),
after=tenacity.after_log(Context.logger, logging.DEBUG),
)
def store_document(self, file_name: str, content_type: str, data: bytes):
def store_document(self, app_uuid: str, file_name: str,
content_type: str, data: bytes):
object_name = f'{DOCUMENTS_DIR}/{file_name}'
if Context.get().app.cfg.experimental.more_apps_enabled:
object_name = f'{app_uuid}/{object_name}'
with temp_binary_file(data=data) as file:
self.client.put_object(
bucket_name=self.cfg.bucket,
object_name=f'{DOCUMENTS_DIR}/{file_name}',
object_name=object_name,
data=file,
length=len(data),
content_type=content_type,
Expand Down
2 changes: 1 addition & 1 deletion document_worker/consts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
DEFAULT_ENCODING = 'utf-8'
EXIT_SUCCESS = 0
VERSION = '3.5.0'
VERSION = '3.6.0'
PROG_NAME = 'docworker'
LOGGER_NAME = 'docworker'
CURRENT_METAMODEL = 4
Expand Down
3 changes: 2 additions & 1 deletion document_worker/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def __call__(self, source_format: FileFormat, target_format: FileFormat,
format=self.FORMATS.get(source_format)
)
result = g.serialize(
format=self.FORMATS.get(target_format)
format=self.FORMATS.get(target_format),
encoding=DEFAULT_ENCODING,
)
return result
12 changes: 6 additions & 6 deletions document_worker/model/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def load(data: dict, **options):
class Integration:

def __init__(self, uuid, name, logo, id, item_url, props, rq_body, rq_headers,
rq_method, rq_url, rs_id_field, rs_list_field, rs_name_field,
rq_method, rq_url, rs_list_field, rs_item_id, rs_item_template,
annotations):
self.uuid = uuid # type: str
self.name = name # type: str
Expand All @@ -52,9 +52,9 @@ def __init__(self, uuid, name, logo, id, item_url, props, rq_body, rq_headers,
self.rq_method = rq_method # type: str
self.rq_url = rq_url # type: str
self.rq_headers = rq_headers # type: dict[str, str]
self.rs_id_field = rs_id_field # type: str
self.rs_list_field = rs_list_field # type: str
self.rs_name_field = rs_name_field # type: str
self.rs_item_id = rs_item_id # type: str
self.rs_item_template = rs_item_template # type: str
self.annotations = annotations # type: dict[str, str]

@property
Expand All @@ -75,16 +75,16 @@ def load(data: dict, **options):
uuid=data['uuid'],
name=data['name'],
id=data['id'],
item_url=data['itemUrl'],
item_url=data['responseItemUrl'],
logo=data['logo'],
props=data['props'],
rq_body=data['requestBody'],
rq_headers=data['requestHeaders'],
rq_method=data['requestMethod'],
rq_url=data['requestUrl'],
rs_id_field=data['responseIdField'],
rs_list_field=data['responseListField'],
rs_name_field=data['responseNameField'],
rs_item_id=data['responseItemId'],
rs_item_template=data['responseItemTemplate'],
annotations=data['annotations'],
)

Expand Down
32 changes: 32 additions & 0 deletions document_worker/model/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import requests

from document_worker.config import TemplateConfig


class RequestsWrapper:

def __init__(self, template_cfg: TemplateConfig):
self.limit = template_cfg.requests.limit
self.timeout = template_cfg.requests.timeout
self.request_counter = 0

def _prepare_for_request(self):
self.request_counter += 1
if self.request_counter > self.limit:
raise RuntimeError(f'Number of requests is over the limit {self.limit}')

def get(self, url, params=None, **kwargs) -> requests.Response:
self._prepare_for_request()
kwargs.update(timeout=self.timeout)
resp = requests.get(url=url, params=params, **kwargs)
return resp

def post(self, url, data=None, json=None, **kwargs) -> requests.Response:
self._prepare_for_request()
kwargs.update(timeout=self.timeout)
return requests.post(url=url, data=data, json=json, **kwargs)

def request(self, method: str, url: str, **kwargs) -> requests.Response:
self._prepare_for_request()
kwargs.update(timeout=self.timeout)
return requests.request(method=method, url=url, **kwargs)
11 changes: 11 additions & 0 deletions document_worker/templates/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,19 @@ def __init__(self, template, options: dict):
def _add_j2_enhancements(self):
from document_worker.templates.filters import filters
from document_worker.templates.tests import tests
from document_worker.model.http import RequestsWrapper
self.j2_env.filters.update(filters)
self.j2_env.tests.update(tests)
template_cfg = Context.get().app.cfg.templates.get_config(
self.template.template_id,
)
if template_cfg is not None:
global_vars = {'secrets': template_cfg.secrets}
if template_cfg.requests.enabled:
global_vars['requests'] = RequestsWrapper(
template_cfg=template_cfg,
)
self.j2_env.globals.update(global_vars)

def execute_first(self, context: dict) -> DocumentFile:
def asset_fetcher(file_name):
Expand Down
14 changes: 10 additions & 4 deletions document_worker/templates/templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,12 @@ def __init__(self, db_template, db_files, db_assets):

class Template:

def __init__(self, template_dir: pathlib.Path, db_template: TemplateComposite):
def __init__(self, app_uuid: str, template_dir: pathlib.Path,
db_template: TemplateComposite):
self.app_uuid = app_uuid
self.template_dir = template_dir
self.db_template = db_template
self.template_id = self.db_template.template.template_id
self.template_id = self.db_template.template.id
self.formats = dict() # type: Dict[str, Format]
self.prepare_template_files()
self.prepare_template_assets()
Expand Down Expand Up @@ -94,8 +96,11 @@ def prepare_template_files(self):

def prepare_template_assets(self):
Context.logger.info(f'Storing assets of template {self.template_id} locally')
path_prefix = f'templates/{self.db_template.template.id}'
if Context.get().app.cfg.experimental.more_apps_enabled:
path_prefix = f'{self.app_uuid}/{path_prefix}'
for asset in self.db_template.assets:
remote_path = f'templates/{self.db_template.template.id}/{asset.uuid}'
remote_path = f'{path_prefix}/{asset.uuid}'
local_path = self.template_dir / asset.file_name
result = Context.get().app.s3.download_file(remote_path, local_path)
if not result:
Expand All @@ -122,7 +127,7 @@ def render(self, format_uuid: str, context: dict) -> DocumentFile:


def prepare_template(template: DBTemplate, files: List[DBTemplateFile],
assets: List[DBTemplateAsset]) -> Template:
assets: List[DBTemplateAsset], app_uuid: str) -> Template:
workdir = Context.get().app.workdir
template_id = template.id
template_dir = workdir / template_id.replace(':', '_')
Expand All @@ -131,6 +136,7 @@ def prepare_template(template: DBTemplate, files: List[DBTemplateFile],
template_dir.mkdir()

return Template(
app_uuid=app_uuid,
template_dir=template_dir,
db_template=TemplateComposite(
db_template=template,
Expand Down
2 changes: 2 additions & 0 deletions document_worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def prepare_template(self):
db_assets = self.ctx.app.db.fetch_template_assets(**query_args)
# prepare template
self.template = prepare_template(
app_uuid=self.app_uuid,
template=db_template,
files=db_files,
assets=db_assets,
Expand All @@ -139,6 +140,7 @@ def store_document(self):
self.ctx.app.s3.ensure_bucket()
self.log.info(f'Storing document to S3 bucket {s3_id}')
self.ctx.app.s3.store_document(
app_uuid=self.app_uuid,
file_name=self.doc_uuid,
content_type=self.final_file.content_type,
data=self.final_file.content,
Expand Down
11 changes: 7 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,23 @@
certifi==2021.10.8
charset-normalizer==2.0.8
click==8.0.3
colorama==0.4.4
idna==3.3
isodate==0.6.0
Jinja2==3.0.2
Markdown==3.3.4
Jinja2==3.0.3
Markdown==3.3.6
MarkupSafe==2.0.1
mdx-breakless-lists==1.0.1
minio==7.1.1
pathvalidate==2.5.0
psycopg2==2.9.1
pyparsing==3.0.4
psycopg2==2.9.2
pyparsing==3.0.6
python-dateutil==2.8.2
python-slugify==5.0.2
PyYAML==6.0
rdflib==6.0.2
rdflib-jsonld==0.6.2
requests==2.26.0
six==1.16.0
tenacity==8.0.1
text-unidecode==1.3
Expand Down
Loading

0 comments on commit 01f7d39

Please sign in to comment.