-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adicionar a capacidade de pupolar o campo related_articles #302
Changes from 1 commit
e825918
6e44ca2
7c00f6d
3eaceac
2a9692d
443a04a
644d401
7f63961
e63ca9c
1fc2523
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,20 @@ | ||
import logging | ||
from datetime import datetime | ||
from re import match | ||
from typing import Iterable, Generator, Dict, List, Tuple | ||
from typing import Callable, Iterable, Generator, Dict, List, Tuple | ||
|
||
import requests | ||
from lxml import etree as et | ||
from opac_schema.v1 import models | ||
|
||
import common.hooks as hooks | ||
from operations.exceptions import InvalidOrderValueError | ||
from operations.docs_utils import ( | ||
get_bundle_id, | ||
) | ||
|
||
from common.sps_package import ( | ||
SPS_Package, | ||
extract_number_and_supplment_from_issue_element, | ||
) | ||
|
||
|
@@ -79,6 +82,7 @@ def ArticleFactory( | |
document_order: int, | ||
document_xml_url: str, | ||
repeated_doc_pids=None, | ||
fetch_document_xml:callable=None, | ||
) -> models.Article: | ||
"""Cria uma instância de artigo a partir dos dados de entrada. | ||
|
||
|
@@ -91,6 +95,8 @@ def ArticleFactory( | |
issue_id (str): Identificador de issue. | ||
document_order (int): Posição do artigo. | ||
document_xml_url (str): URL do XML do artigo | ||
fetch_document_xml (callable): Função para obter o XML do Kernel caso | ||
necessário. | ||
|
||
Returns: | ||
models.Article: Instância de um artigo próprio do modelo de dados do | ||
|
@@ -352,6 +358,43 @@ def _get_order(document_order, pid_v2): | |
except (ValueError, TypeError): | ||
raise InvalidOrderValueError(order_err_msg) | ||
|
||
def _update_related_articles(related_dict): | ||
""" | ||
Atualiza o campo related_articles com o pid do artigo vinculado à errata, | ||
adendo ou retratação. | ||
""" | ||
related_doi = related_dict.get('doi') | ||
|
||
# Update the articles by doi | ||
if related_doi: | ||
try: | ||
article = models.Article.objects.get(doi=related_doi) | ||
except models.Article.DoesNotExist as ex: | ||
logging.error("Documento não existe na base de dados do site com o DOI: %s, ao tentar popular o campo related_articles, erro: %s" % (related_doi, ex)) | ||
else: | ||
article.related_articles = [ | ||
models.RelatedArticle(**related_dict)] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. talvez prever que um documento no decorrer do tempo possa ter mais de uma errata There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sim para esse caso seria:
Ou seja, eu incremento a lista de related_articles caso ela já exista! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gitnnolabs mas acho que há um engano aqui... pois me parece que está sendo atualizado o related_article com os dados que ele já tem e não com os dados do documento relacionado com ele. |
||
return article.save() | ||
|
||
|
||
def _get_related_articles(document_id, xml): | ||
""" | ||
Obtém a lista de documentos relacionados do XML tag: | ||
|
||
<related-article ext-link-type="doi" id="ra1" | ||
related-article-type="corrected-article" | ||
xlink:href="10.1590/S0103-50532006000200015"/> | ||
""" | ||
# # sps_package = SPS_Package(et.XML(xml)) | ||
# resp = requests.get('http://www.scielo.br/j/jbchs/a/Z6mnK3PjKhDZtHJQJYPzxpw/?lang=en&format=xml', verify=False) | ||
|
||
sps_package = SPS_Package(et.XML(resp.content)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isso pode levantar exceção, certo? |
||
|
||
for related_dict in sps_package.related_articles: | ||
if _update_related_articles(related_dict): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. me parece que aqui deveria incluir o parâmetro pid v3 do documento que está sendo processado |
||
logging.info("Relacionamento entre o artigo DOI: %s(%s) e relacionado DOI: %s realizado com sucesso" % ( | ||
related_dict.get('doi'), related_dict.get('related_type'), document_id)) | ||
|
||
article.authors = list(_get_article_authors(data)) | ||
article.authors_meta = _get_article_authors_meta(data) | ||
article.languages = list(_get_languages(data)) | ||
|
@@ -403,6 +446,13 @@ def _get_order(document_order, pid_v2): | |
article.order = _get_order(document_order, article.pid) | ||
article.xml = document_xml_url | ||
|
||
# Se for uma errata ou retratação ou adendo. | ||
if (article.type == "correction" | ||
or article.type == "retraction" | ||
or article.type == "addendum"): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sugestão, mas não precisa mudar por causa disso article.type in ['correction', 'retraction', 'addendum'] There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Boa! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. me corrijo, se isso tiver melhor desempenho, sim mudar, pois a produção tem processado uma qtd imensa de pacotes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK |
||
xml = fetch_document_xml(document_id) | ||
_get_related_articles(article.doi, xml) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gitnnolabs Me parece que o PR está incompleto e incorreto documento A No documento A, há no xml: {
"doi" : "10.1590/S0103-50532006000200015",
"related-type" : "corrected-article",
},
{
"doi" : "10.1590/S0103-50532006000200007",
"related-type" : "corrected-article",
} que são respectivamente dados de B1 e B2. Este PR deve fazer o seguinte
{
"ref_id": pid v3 do documento B1 a ser descoberto procurando pelo seu doi,
"doi" : "10.1590/S0103-50532006000200015",
"related-type" : "corrected-article",
},
{
"ref_id": pid v3 do documento B2 a ser descoberto procurando pelo seu doi,
"doi" : "10.1590/S0103-50532006000200007",
"related-type" : "corrected-article",
}
{
"ref_id": pid v3 do documento A (já sabido)
"doi" : doi do documento A (já sabido),
"related-type" : article_type do documento A,
}, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @robertatakenaka não realizei a inclusão dos dados na errata que no exemplo seria o documento A, pois na interface do site, já contém a mensagem dessa relação. Mas pensando melhor você tem razão, dessa forma mantemos na base de dados a relação de ida e volta: ['errata', 'retração', 'adendo'] <-> documento Esse PR está contemplando somente: documento -> ['errata', 'retração', 'adendo'] Agora com essa alteração, suspeito que removemos do packtools geração da caixa de errata quando existir a tag There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gitnnolabs são dois ou 3 pontos diferentes
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
# Campo de compatibilidade do OPAC | ||
article.htmls = [{"lang": lang} for lang in _get_languages(data)] | ||
|
||
|
@@ -417,6 +467,7 @@ def try_register_documents( | |
get_relation_data: callable, | ||
fetch_document_front: callable, | ||
article_factory: callable, | ||
fetch_document_xml: callable, | ||
) -> List[str]: | ||
"""Registra documentos do Kernel na base de dados do `OPAC`. | ||
|
||
|
@@ -433,6 +484,8 @@ def try_register_documents( | |
`front` do documento a partir da API do Kernel. | ||
article_factory (callable): função que cria uma instância do modelo | ||
de dados do Artigo na base do OPAC. | ||
fetch_document_xml (callable): função que recupera XML | ||
do documento a partir da API do Kernel. | ||
|
||
Returns: | ||
List[str] orphans: Lista contendo todos os identificadores dos | ||
|
@@ -467,6 +520,7 @@ def try_register_documents( | |
item.get("order"), | ||
document_xml_url, | ||
repeated_doc_pids, | ||
fetch_document_xml | ||
) | ||
document.save() | ||
logging.info("ARTICLE saved %s %s" % (document_id, issue_id)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
incluir o parâmetro pid v3 do artigo que contém
related_articles