Skip to content

Commit

Permalink
fix: don't duplicate the CSV header line, and warn when it's not there
Browse files Browse the repository at this point in the history
  • Loading branch information
joanise authored and roedoejet committed Oct 10, 2023
1 parent 717fed0 commit 4c429c9
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
4 changes: 4 additions & 0 deletions everyvoice/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ def generic_dict_loader(
quoting=quoting,
escapechar=escapechar,
)
# When fieldnames is given, csv.DictReader treats the header line as a
# data line, but we don't want that, so skip it.
if fieldnames:
next(reader)
files = list(reader)
return files

Expand Down
9 changes: 9 additions & 0 deletions everyvoice/wizard/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,15 @@ def effect(self):
filelist_path, delimiter=self.state.get("filelist_delimiter")
)
self.state["filelist_headers"] = list(self.state["filelist_data"][0])
if (
"text" not in self.state["filelist_headers"]
and "basename" not in self.state["filelist_headers"]
):
print(
"Warning: we assume the filelist's first line has header names, but "
'your filelist does not have the standard "basename" and "text" headers. '
"The first line will be replaced by headers based on your next answers."
)
if "text" not in self.state["filelist_headers"]:
self.tour.add_step(
HeaderStep(
Expand Down

0 comments on commit 4c429c9

Please sign in to comment.