Skip to content

Commit

Permalink
make CWL format check against IANA media-type URI more resilient agai…
Browse files Browse the repository at this point in the history
…nst temporary/sporadic SSL handshake error
  • Loading branch information
fmigneault committed Sep 4, 2024
1 parent f1d0463 commit 4fa4d5a
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 5 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Fixes:
------
- Fix invalid resolution of ``weaver.formats.ContentEncoding.open_parameters``.
- Fix minor resolution combinations or redundant checks for multiple ``weaver.formats`` utilities.
- FIx `CWL` ``format`` resolution check against `IANA` media-types if the reference ontology happens to be
temporarily/sporadically unresponsive to SSL handshake check, allowing temporary HTTP resolution of media-type.

.. _changes_5.7.0:

Expand Down
28 changes: 27 additions & 1 deletion tests/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import itertools
import os
import uuid
from urllib.error import URLError
from urllib.request import urlopen

import mock
import pytest
Expand All @@ -14,7 +16,7 @@

from tests.utils import MockedRequest
from weaver import formats as f
from weaver.utils import null
from weaver.utils import null, request_extra

_ALLOWED_MEDIA_TYPE_CATEGORIES = [
"application",
Expand Down Expand Up @@ -384,6 +386,30 @@ def mock_urlopen(*_, **__):
assert mocked_urlopen.call_count == 1, "Expected internal fallback request calls"


def test_get_cwl_file_format_retry_fallback_ssl_error():
def http_only_request_extra(method, url, *_, **__):
if url.startswith("https://"):
raise ConnectionError("fake SSL error")
return request_extra(method, url, *_, **__)

def http_only_urlopen(url, *_, **__):
if url.startswith("https://"):
raise URLError("urlopen fake SSL error: The handshake operation timed out")
return urlopen(url, *_, **__)

with mock.patch("weaver.utils.request_extra", side_effect=http_only_request_extra) as mocked_request_extra:
with mock.patch("weaver.formats.urlopen", side_effect=http_only_urlopen) as mocked_urlopen:
test_type = f"{f.IANA_NAMESPACE}:text/javascript"
url_ctype = f"{f.IANA_NAMESPACE_URL}text/javascript"
ns, ctype = f.get_cwl_file_format(test_type)
assert ns == f.IANA_NAMESPACE_DEFINITION
assert ctype == test_type
assert mocked_urlopen.call_count == 1, "1 call for urllib approach as first attempt failing HTTPS SSL check"
assert mocked_request_extra.call_count == 2, "2 calls should occur, 1 for HTTPS, 1 for HTTP fallback"
assert mocked_request_extra.call_args_list[0].args == ("head", url_ctype)
assert mocked_request_extra.call_args_list[1].args == ("head", url_ctype.replace("https://", "http://"))


def test_get_cwl_file_format_synonym():
"""
Test handling of special non-official MIME-type that have a synonym redirection to an official one.
Expand Down
27 changes: 23 additions & 4 deletions weaver/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
import socket
from typing import TYPE_CHECKING, cast, overload
from urllib.error import HTTPError
from urllib.error import HTTPError, URLError
from urllib.request import urlopen

import yaml
Expand Down Expand Up @@ -829,11 +829,30 @@ def _request_extra_various(_media_type):
with urlopen(_media_type_url, timeout=2) as resp: # nosec: B310 # IANA scheme guaranteed HTTP
if resp.code == HTTPOk.code:
return _make_if_ref(IANA_NAMESPACE_DEFINITION, IANA_NAMESPACE, _media_type)
except socket.timeout:
except socket.timeout: # pragma: no cover
continue
break
except HTTPError:
break # pragma: no cover # don't keep retrying if the cause is not timeout/ssl, but not resolved
except HTTPError: # pragma: no cover # same as above, but for cases where the HTTP code raised directly
pass
except URLError as exc:
# if error is caused by a sporadic SSL error
# allow temporary HTTP resolution given IANA is a well-known URI
# however, ensure the cause is in fact related to SSL, and still a resolvable referenced
http_err = str(exc.args[0]).lower()
http_url = "http://" + _media_type_url.split("://", 1)[-1]
if (
_media_type_url.startswith(IANA_NAMESPACE_URL) and
any(err in http_err for err in ["ssl", "handshake"]) and
any(err in http_err for err in ["timeout", "timed out"])
):
try:
resp = request_extra("head", http_url, retries=0, timeout=2,
allow_redirects=True, allowed_codes=[HTTPOk.code, HTTPNotFound.code])
if resp.status_code == HTTPOk.code:
return _make_if_ref(IANA_NAMESPACE_DEFINITION, IANA_NAMESPACE, _media_type)
except ConnectionError: # pragma: no cover
LOGGER.debug("Format request [%s] connection error: [%s] (last resort no-SSL check)", http_url, exc)
return None
return None

if not media_type:
Expand Down

0 comments on commit 4fa4d5a

Please sign in to comment.