Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Style fixes for main #544

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion measurements/perplexity/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ def _info(self):
def _compute(
self, data, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
):

if device is not None:
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
if device == "gpu":
Expand Down
1 change: 0 additions & 1 deletion metrics/bertscore/bertscore.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ def _compute(
baseline_path=None,
use_fast_tokenizer=False,
):

if isinstance(references[0], str):
references = [[ref] for ref in references]

Expand Down
2 changes: 1 addition & 1 deletion metrics/bleu/tokenizer_13a.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __call__(self, line):
:param line: a segment to tokenize
:return: the tokenized line
"""
for (_re, repl) in self._re:
for _re, repl in self._re:
line = _re.sub(repl, line)

# no leading or trailing spaces, single space within words
Expand Down
2 changes: 0 additions & 2 deletions metrics/bleurt/bleurt.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class BLEURT(evaluate.Metric):
def _info(self):

return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
Expand All @@ -96,7 +95,6 @@ def _info(self):
)

def _download_and_prepare(self, dl_manager):

# check that config name specifies a valid BLEURT model
if self.config_name == "default":
logger.warning(
Expand Down
1 change: 0 additions & 1 deletion metrics/brier_score/brier_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ def _get_feature_types(self):
]

def _compute(self, references, predictions, sample_weight=None, pos_label=1):

brier_score = brier_score_loss(references, predictions, sample_weight=sample_weight, pos_label=pos_label)

return {"brier_score": brier_score}
2 changes: 0 additions & 2 deletions metrics/code_eval/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,7 @@ def check_correctness(check_program, timeout, task_id, completion_id):


def unsafe_execute(check_program, result, timeout):

with create_tempdir():

# These system calls are needed when cleaning up tempdir.
import os
import shutil
Expand Down
1 change: 0 additions & 1 deletion metrics/comet/comet.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class COMET(evaluate.Metric):
def _info(self):

return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
Expand Down
1 change: 0 additions & 1 deletion metrics/coval/coval.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@
def get_coref_infos(
key_lines, sys_lines, NP_only=False, remove_nested=False, keep_singletons=True, min_span=False, doc="dummy_doc"
):

key_doc_lines = {doc: key_lines}
sys_doc_lines = {doc: sys_lines}

Expand Down
1 change: 0 additions & 1 deletion metrics/exact_match/exact_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def _compute(
ignore_punctuation=False,
ignore_numbers=False,
):

if regexes_to_ignore is not None:
for s in regexes_to_ignore:
predictions = np.array([re.sub(s, "", x) for x in predictions])
Expand Down
2 changes: 1 addition & 1 deletion metrics/google_bleu/tokenizer_13a.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __call__(self, line):
:param line: a segment to tokenize
:return: the tokenized line
"""
for (_re, repl) in self._re:
for _re, repl in self._re:
line = _re.sub(repl, line)

# no leading or trailing spaces, single space within words
Expand Down
1 change: 0 additions & 1 deletion metrics/mae/mae.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def _get_feature_types(self):
}

def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average"):

mae_score = mean_absolute_error(references, predictions, sample_weight=sample_weight, multioutput=multioutput)

return {"mae": mae_score}
1 change: 0 additions & 1 deletion metrics/mahalanobis/mahalanobis.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def _info(self):
)

def _compute(self, X, reference_distribution):

# convert to numpy arrays
X = np.array(X)
reference_distribution = np.array(reference_distribution)
Expand Down
1 change: 0 additions & 1 deletion metrics/mape/mape.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def _get_feature_types(self):
}

def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average"):

mape_score = mean_absolute_percentage_error(
references,
predictions,
Expand Down
1 change: 0 additions & 1 deletion metrics/mase/mase.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def _compute(
sample_weight=None,
multioutput="uniform_average",
):

y_pred_naive = training[:-periodicity]
mae_naive = mean_absolute_error(training[periodicity:], y_pred_naive, multioutput=multioutput)

Expand Down
1 change: 0 additions & 1 deletion metrics/mse/mse.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ def _get_feature_types(self):
}

def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average", squared=True):

mse = mean_squared_error(
references, predictions, sample_weight=sample_weight, multioutput=multioutput, squared=squared
)
Expand Down
1 change: 0 additions & 1 deletion metrics/perplexity/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def _info(self):
def _compute(
self, predictions, model_id, batch_size: int = 16, add_start_token: bool = True, device=None, max_length=None
):

if device is not None:
assert device in ["gpu", "cpu", "cuda"], "device should be either gpu or cpu."
if device == "gpu":
Expand Down
2 changes: 0 additions & 2 deletions metrics/sari/sari.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,6 @@ def SARIsent(ssent, csent, rsents):


def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_str: bool = True):

# Normalization is requried for the ASSET dataset (one of the primary
# datasets in sentence simplification) to allow using space
# to split the sentence. Even though Wiki-Auto and TURK datasets,
Expand Down Expand Up @@ -279,7 +278,6 @@ def _info(self):
)

def _compute(self, sources, predictions, references):

if not (len(sources) == len(predictions) == len(references)):
raise ValueError("Sources length must match predictions and references lengths.")
sari_score = 0
Expand Down
1 change: 0 additions & 1 deletion metrics/smape/smape.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ def _get_feature_types(self):
}

def _compute(self, predictions, references, sample_weight=None, multioutput="uniform_average"):

smape_score = symmetric_mean_absolute_percentage_error(
references,
predictions,
Expand Down
2 changes: 0 additions & 2 deletions metrics/wiki_split/wiki_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ def SARIsent(ssent, csent, rsents):


def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_str: bool = True):

# Normalization is requried for the ASSET dataset (one of the primary
# datasets in sentence simplification) to allow using space
# to split the sentence. Even though Wiki-Auto and TURK datasets,
Expand Down Expand Up @@ -285,7 +284,6 @@ def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_s


def compute_sari(sources, predictions, references):

if not (len(sources) == len(predictions) == len(references)):
raise ValueError("Sources length must match predictions and references lengths.")
sari_score = 0
Expand Down
1 change: 0 additions & 1 deletion metrics/xtreme_s/xtreme_s.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,6 @@ def _info(self):
)

def _compute(self, predictions, references, bleu_kwargs=None, wer_kwargs=None):

bleu_kwargs = bleu_kwargs if bleu_kwargs is not None else {}
wer_kwargs = wer_kwargs if wer_kwargs is not None else {}

Expand Down
2 changes: 0 additions & 2 deletions src/evaluate/evaluation_suite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,10 @@ def assert_suite_nonempty(self):
def run(
self, model_or_pipeline: Union[str, "Pipeline", Callable, "PreTrainedModel", "TFPreTrainedModel"] # noqa: F821
) -> Dict[str, float]:

self.assert_suite_nonempty()

results_all = []
for task in self.suite:

task_name = task.data

if task.data_preprocessor: # task requires extra preprocessing
Expand Down
1 change: 0 additions & 1 deletion src/evaluate/evaluator/audio_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ def compute(
label_column: str = "label",
label_mapping: Optional[Dict[str, Number]] = None,
) -> Tuple[Dict[str, float], Any]:

"""
input_column (`str`, defaults to `"file"`):
The name of the column containing either the audio files or a raw waveform, represented as a numpy array, in the dataset specified by `data`.
Expand Down
1 change: 0 additions & 1 deletion src/evaluate/evaluator/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ def compute(
label_column: str = "label",
label_mapping: Optional[Dict[str, Number]] = None,
) -> Dict[str, float]:

result = {}

self.check_for_mismatch_in_device_setup(device, model_or_pipeline)
Expand Down
1 change: 0 additions & 1 deletion src/evaluate/evaluator/image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def compute(
label_column: str = "label",
label_mapping: Optional[Dict[str, Number]] = None,
) -> Tuple[Dict[str, float], Any]:

"""
input_column (`str`, defaults to `"image"`):
The name of the column containing the images as PIL ImageFile in the dataset specified by `data`.
Expand Down
1 change: 0 additions & 1 deletion src/evaluate/inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def list_evaluation_modules(module_type=None, include_community=True, with_detai


def _list_evaluation_modules_type(module_type, include_community=True, with_details=False):

r = requests.get(HF_LIST_ENDPOINT.format(type=module_type))
r.raise_for_status()
d = r.json()
Expand Down
3 changes: 2 additions & 1 deletion src/evaluate/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,7 +658,8 @@ def evaluation_module_factory(
download_mode=download_mode,
dynamic_modules_path=dynamic_modules_path,
).get_module()
except Exception as e1: # noqa: all the attempts failed, before raising the error we should check if the module is already cached.
except Exception as e1: # all the attempts have failed
# before raising the error we should check if the module is already cached.
# if it's a canonical module we need to check if it's any of the types
if path.count("/") == 0:
for current_type in ["metric", "comparison", "measurement"]:
Expand Down
1 change: 0 additions & 1 deletion src/evaluate/utils/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,6 @@ def get_from_cache(
# Prevent parallel downloads of the same file with a lock.
lock_path = cache_path + ".lock"
with FileLock(lock_path):

if resume_download:
incomplete_path = cache_path + ".incomplete"

Expand Down
2 changes: 0 additions & 2 deletions src/evaluate/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class ComplexRadar:
"""

def __init__(self, fig, variables, ranges, n_ring_levels=5, show_scales=True, format_cfg=None):

self.format_cfg = format_cfg

# Calculate angles and create for each variable an axes
Expand All @@ -41,7 +40,6 @@ def __init__(self, fig, variables, ranges, n_ring_levels=5, show_scales=True, fo

# Writing the ranges on each axes
for i, ax in enumerate(axes):

# Here we do the trick by repeating the first iteration
j = 0 if (i == 0 or i == 1) else i - 1
ax.set_ylim(*ranges[j])
Expand Down
2 changes: 0 additions & 2 deletions tests/test_evaluation_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ def setUp(self):
self.dummy_model = DummyTextClassificationPipeline()

def test_running_evaluation_suite(self):

# Check that the evaluation suite successfully runs
results = self.evaluation_suite.run(self.dummy_model)

Expand All @@ -25,7 +24,6 @@ def test_running_evaluation_suite(self):
self.assertEqual(len(results), 2)

def test_empty_suite(self):

self.empty_suite = self.evaluation_suite
self.empty_suite.suite = []
self.assertRaises(ValueError, self.empty_suite.run, self.dummy_model)
1 change: 0 additions & 1 deletion tests/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,6 @@ def test_default_pipe_init(self):
self.assertEqual(results["accuracy"], 1.0)

def test_data_loading(self):

# Test passing in dataset by name with split
data = self.evaluator.load_data("evaluate/imdb-ci", split="test[:1]")
self.evaluator.prepare_data(data=data, input_column="text", label_column="label", second_input_column=None)
Expand Down
1 change: 0 additions & 1 deletion tests/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,6 @@ def test_string_casting(self):
metric.compute(predictions=["a"], references=["a"])

def test_string_casting_tested_once(self):

self.counter = 0

def checked_fct(fct): # wrapper function that increases a counter on each call
Expand Down