diff --git a/airflow/dags/operations/sync_kernel_to_website_operations.py b/airflow/dags/operations/sync_kernel_to_website_operations.py
index 3c1bb002..63bd6b94 100644
--- a/airflow/dags/operations/sync_kernel_to_website_operations.py
+++ b/airflow/dags/operations/sync_kernel_to_website_operations.py
@@ -84,6 +84,7 @@ def ArticleFactory(
document_xml_url: str,
repeated_doc_pids=None,
fetch_document_xml: callable = None,
+ fetch_documents_manifest: callable = None,
) -> models.Article:
"""Cria uma instância de artigo a partir dos dados de entrada.
@@ -97,6 +98,7 @@ def ArticleFactory(
document_order (int): Posição do artigo.
document_xml_url (str): URL do XML do artigo
fetch_document_xml (callable): Função para obter o XML do Kernel caso
+ fetch_document_xml (callable): Função para obter o JSON Manifest do Kernel caso
necessário.
Returns:
@@ -514,6 +516,63 @@ def _get_related_articles(xml):
for related_dict in sps_package.related_articles:
_update_related_articles(article, related_dict)
+ def _update_suppl_material(document_id, filename, url):
+ """
+ Atualiza os material suplementar.
+
+ Return a suplementary material dict.
+
+ {
+ "url" : "https://minio.scielo.br/documentstore/2237-9622/d6DyD7CHXbpTJbLq7NQQNdq/5d88e2211c5357e2a9d8caeac2170f4f3d1305d1.pdf"
+ "filename": "suppl01.pdf"
+ }
+ """
+
+ suppl_data = {
+ "url": url,
+ "filename": filename
+ }
+
+ mat_suppl_entity = models.MatSuppl(**suppl_data)
+
+ try:
+ # Verifica se é uma atualização.
+ _article = models.Article.objects.get(_id=document_id)
+ except models.Article.DoesNotExist as ex:
+ # Caso não seja uma atualização
+ return models.MatSuppl(**suppl_data)
+ else:
+ # É uma atualização
+ # Mantém a unicidade da atualização do material suplementar
+ if mat_suppl_entity not in _article.mat_suppl:
+ _article.mat_suppl += [mat_suppl_entity]
+ return _article.mat_suppl
+ else:
+ return _article.mat_suppl
+
+ def _get_suppl_material(article, json):
+ """
+ Obtém a lista de material suplementar do JSON do Manifest do Kernel e caso existe atualiza a entidade MatSuppl.
+
+ Tags no XML o material suplementar: ["inline-supplementary-material", "supplementary-material"]:
+ Supplementary data
+
+
+ """
+ # check if exist a supplementary_material
+ logging.info("Checking if exists supplementary material....")
+
+ assets = _nestget(json, "versions", 0, "assets")
+ suppls = [k for k in assets.keys() if 'suppl' in k]
+
+ if any(suppls):
+ logging.info("Exists supplementary material: %s" %
+ (' '.join(suppls)))
+ for key, asset in assets.items():
+ if key in suppls:
+ return _update_suppl_material(article,
+ filename=key, url=_nestget(asset, 0, 1))
+
article.authors = list(_get_article_authors(data))
article.authors_meta = _get_article_authors_meta(data)
article.languages = list(_get_languages(data))
@@ -564,6 +623,11 @@ def _get_related_articles(xml):
article.order = _get_order(document_order, article.pid)
article.xml = document_xml_url
+ # Cadastra o material suplementar
+ if fetch_documents_manifest:
+ json = fetch_documents_manifest(document_id)
+ article.mat_suppl = _get_suppl_material(document_id, json)
+
# Se for uma errata ou retratação ou adendo ou comentário de artigo.
if article.type in ["correction", "retraction", "addendum", "article-commentary"]:
# Obtém o XML da errada no kernel
@@ -584,7 +648,8 @@ def try_register_documents(
get_relation_data: callable,
fetch_document_front: callable,
article_factory: callable,
- fetch_document_xml: callable,
+ fetch_document_xml: callable = None,
+ fetch_documents_manifest: callable = None,
) -> List[str]:
"""Registra documentos do Kernel na base de dados do `OPAC`.
@@ -637,7 +702,8 @@ def try_register_documents(
item.get("order"),
document_xml_url,
repeated_doc_pids,
- fetch_document_xml
+ fetch_document_xml,
+ fetch_documents_manifest
)
document.save()
logging.info("ARTICLE saved %s %s" % (document_id, issue_id))
diff --git a/airflow/dags/sync_kernel_to_website.py b/airflow/dags/sync_kernel_to_website.py
index 2eba4fcd..a49fafd7 100644
--- a/airflow/dags/sync_kernel_to_website.py
+++ b/airflow/dags/sync_kernel_to_website.py
@@ -162,6 +162,13 @@ def fetch_documents_xml(document_id):
return fetch_data("/documents/%s" % (document_id), json=False)
+def fetch_documents_manifest(document_id):
+ """
+ Obtém o XML do Document do Kernel com base no parametro 'document_id'
+ """
+ return fetch_data("/documents/%s/manifest" % (document_id), json=True)
+
+
def _get_relation_data_from_kernel_bundle(document_id, front_data=None):
"""
Obtém os dados do documento no bundle
@@ -747,7 +754,7 @@ def _get_known_documents(**kwargs) -> Dict[str, List[str]]:
)
orphans = try_register_documents(
- documents_to_get, _get_relation_data, fetch_documents_front, ArticleFactory, fetch_documents_xml,
+ documents_to_get, _get_relation_data, fetch_documents_front, ArticleFactory, fetch_documents_xml, fetch_documents_manifest,
)
Variable.set("orphan_documents", orphans, serialize_json=True)
@@ -916,6 +923,7 @@ def register_last_issues(ds, **kwargs):
except AttributeError:
logging.info("No issues are registered to models.Journal: %s " % journal)
+
def must_send_email(ds, **kwargs):
"""If IS_SPORADIC == True return False to avoid send e-mail,
but if IS_SPORADIC == False, return True to send e-mail.
diff --git a/requirements.txt b/requirements.txt
index e00a57f3..8139f08e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,6 +5,6 @@ deepdiff[murmur]==4.0.7
feedparser==5.2.1
beautifulsoup4==4.9.0
git+https://github.com/scieloorg/xylose.git@1.35.8#egg=xylose
-git+https://github.com/scieloorg/opac_schema.git@v2.60#egg=opac_schema
+git+https://github.com/scieloorg/opac_schema.git@v2.65#egg=opac_schema
git+https://github.com/scieloorg/packtools.git@2.6.4#egg=packtools
aiohttp==3.6.2
\ No newline at end of file