From 38cee6e2aa9489339332146bd4ea331ff2ac164c Mon Sep 17 00:00:00 2001 From: TGS Date: Sat, 1 Jul 2023 16:59:42 +0100 Subject: [PATCH] Admin scripts to facilitate running the hunt --- .gitignore | 3 + README.md | 9 +- admin_scripts/calculate_winners.py | 62 +++++++++++++ admin_scripts/level_validation.py | 132 +++++++++++++++++++++++++++ admin_scripts/site_scraper.py | 124 +++++++++++++++++++++++++ upload.py => admin_scripts/upload.py | 0 6 files changed, 328 insertions(+), 2 deletions(-) create mode 100644 admin_scripts/calculate_winners.py create mode 100644 admin_scripts/level_validation.py create mode 100644 admin_scripts/site_scraper.py rename upload.py => admin_scripts/upload.py (100%) mode change 100755 => 100644 diff --git a/.gitignore b/.gitignore index 95323ba..5d6c7b6 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ /media/ /static/ /treasure.sqlite + +# e.g. Pycharm config +.idea/ diff --git a/README.md b/README.md index 5233971..24d0aa8 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ docker run \ e-treasure-hunt ``` -To use Google maps, you will also need to pass `GM_API_KEY` to this container as +To use Google Maps, you will also need to pass `GM_API_KEY` to this container as an environment variable. # Initiating the app @@ -105,9 +105,14 @@ and N+1. You can use the files in `dummy_files.zip`, updating `blurb.txt` at level 0 with text for the start of the hunt. +It is recommended that, prior to attempting upload, that [level_validation.py](admin_scripts/level_validation.py) +be run over the levels. This will catch numerous formatting problems with the levels before wasting your +time/bandwidth on server upload, and will also catch several conditions that are not technically errors +but are undesirable, such as empty README.md files and too-tight tolerances. + ### Level upload through the API -[upload.py](upload.py) contains utilities for uploading levels and hints. +[upload.py](admin_scripts/upload.py) contains utilities for uploading levels and hints. You'll need to update the `SERVER` and credentials at the top of the file, and then re-arrange `main()` as appropriate to upload your levels. diff --git a/admin_scripts/calculate_winners.py b/admin_scripts/calculate_winners.py new file mode 100644 index 0000000..aead04f --- /dev/null +++ b/admin_scripts/calculate_winners.py @@ -0,0 +1,62 @@ +"""Parse the hunt events CSV downloaded from the hunt website to see who won by various metrics + +ADV = team advanced to that level +REQ = team requested a hint + +Edit the values of the constants at the top of this file for your purposes, e.g. +START_TIME, TEAM_NAMES, etc. +""" +import csv +import datetime +from collections import defaultdict + +# Start time +START_TIME = datetime.datetime.strptime("2000-01-01 00:00:00", "%Y-%m-%d %H:%M:%S") +# 2.0 hours per hint +# N.B. assumes all hints _requested_ take a penalty, +# script will need editing if you want to only account for hints _used_ +PENALTY_PER_HINT_IN_HOURS = 2.0 +# "Final" level, the advance to which encodes that the team finished +FINAL_LEVEL = "51" +# List of team names as strings +TEAM_NAMES = [] +# Path to hunt event csv taken from the website +CSV_FILE_PATH = r"C:\Users\username\Downloads\hunt.huntevent.csv" + + +def main(csv_file): + teams = TEAM_NAMES + team_raw_times = defaultdict(float) + team_running_totals = defaultdict(float) + team_hints_requested = defaultdict(int) + team_levels = defaultdict(int) + + with open(csv_file, encoding="utf-8") as f: + csv_reader = csv.DictReader(f) + + for line in csv_reader: + team = line["user"] + assert team in teams + # penalty of x hours per hint + if line["type"] == "REQ": + team_running_totals[team] += PENALTY_PER_HINT_IN_HOURS + team_hints_requested[team] += 1 + elif line["type"] == "ADV": + team_levels[team] += 1 + # Final level + if line["level"] == FINAL_LEVEL: + timestamp = line["time"].split(".")[0] + finish_time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S") + time_taken = (finish_time - START_TIME).total_seconds() / 60 / 60 + print(time_taken) + team_running_totals[team] += time_taken + team_raw_times[team] = time_taken + + print("Raw times", team_raw_times) + print("Running totals", team_running_totals) + print("Hints requested", team_hints_requested) + print("Team levels completed", team_levels) + + +if __name__ == '__main__': + main(CSV_FILE_PATH) diff --git a/admin_scripts/level_validation.py b/admin_scripts/level_validation.py new file mode 100644 index 0000000..255c392 --- /dev/null +++ b/admin_scripts/level_validation.py @@ -0,0 +1,132 @@ +"""Clientside validator for levels + +Some of these checks just make sure that the hunt website won't reject the upload +(without having to actually attempt such an upload). + +Other checks are for admin-y things like: +- Tolerances that are suspiciously tight +- README.md files (which are supposed to contain a detailed explanation of the structure of the level for the GM's use) + being smaller than blurb.txt files (which are supposed to be a hunter-consumable précis of the level + answer/concept once they've solved it) +""" +import argparse +import json +import os +import re +import zipfile +from pathlib import Path + + +CONTENT_TYPES = { + ".jpeg": "image/jpeg", + ".jpg": "image/jpeg", + ".png": "image/png", +} + + +def unzip_all(): + for filename in os.listdir(ALL_LEVELS_DIR): + if filename.endswith(".zip"): + folder_path = os.path.join(ALL_LEVELS_DIR, filename[:-4]) + if not os.path.exists(folder_path): + with zipfile.ZipFile(os.path.join(ALL_LEVELS_DIR, filename)) as zip_ref: + zip_ref.extractall(folder_path) + + +def validate_format(): + count = 0 + for filename in os.listdir(ALL_LEVELS_DIR): + dir_path = ALL_LEVELS_DIR / filename + if os.path.isdir(dir_path) and not "DUMMY" in filename: + count += 1 + if not os.path.exists(dir_path / "about.json"): + print("No json in", filename) + else: + # Check json for values + with open(dir_path / "about.json") as f: + check_json(f, filename) + + if not os.path.exists(dir_path / "readme.md"): + print("No readme in", filename) + + if not os.path.exists(dir_path / "blurb.txt"): + print("No blurb in", filename) + + # Check readme is bigger than blurb + if os.path.exists(dir_path / "blurb.txt") and os.path.exists(dir_path / "readme.md") \ + and os.path.getsize(dir_path / "blurb.txt") > os.path.getsize(dir_path / "readme.md"): + print("Blurb is bigger than readme for", filename) + + images = [ + dir_path / file + for file in os.listdir(dir_path) + if Path(file).suffix.lower() in CONTENT_TYPES + ] + + # Should find exactly the right number - check the file extensions if not. + if len(images) != 5: + print(f"Found {len(images)} images in {dir_path}") + else: + images.sort(key=lambda x: x.name.lower()) + if not images[0].name.startswith("clue"): + print("No clue in", filename) + + # Check the images aren't too big or bad things will happen to the upload + # We don't want a repeat of the Wawrinka incident + for image in images: + image_size = os.path.getsize(image) + if image_size > 3 * 1000 * 1000: # ~3 MB + print("Image", image, "is too big in", filename, "size = ", f"{image_size:,}") + + for i in range(1, 5): + if not images[i].name.startswith("hint"): + print("No hint", i, "in", filename) + + print("Analyzed", count, "levels") + + +def check_coord(coord: str, coord_name, filename): + lat = float(coord) + if not lat: + print("No", coord_name, "for level", filename) + elif lat == 0.0: + print(" warning: 0", coord_name, "for level", filename) + + numbers_and_dp_only = re.sub("[^0-9.]", "", coord) + a, b = numbers_and_dp_only.split(".") if "." in coord else (coord, "") + if len(b) > 5: + print("More than 5 dp for", coord_name, "for level", filename, ":", coord) + if len(a) + len(b) > 7: + print("More than 7 digits for", coord_name, "for level", filename, ":", coord) + + +def check_json(f, filename): + json_data = json.load(f) + if not len(json_data["name"]) > 0: + print("No name for level", filename) + + check_coord(json_data["latitude"], "lat", filename) + check_coord(json_data["longitude"], "long", filename) + + tol = int(json_data["tolerance"]) + if not tol: + print("No tolerance for level", filename) + elif tol < 1: + print("0 tolerance for level", filename) + elif tol < 20: + print("Too-low-resolution tolerance of", tol, "for level", filename) + elif tol <= 50: + print(" warning: Small tolerance of", tol, "for level", filename) + + +if __name__ == '__main__': + argparser = argparse.ArgumentParser() + argparser.add_argument("input_directory", + help="Path to a directory containing the (possibly zipped) levels to be examined") + args = argparser.parse_args() + ALL_LEVELS_DIR = Path(args.input_directory) + assert ALL_LEVELS_DIR.exists() + assert ALL_LEVELS_DIR.is_dir() + + unzip_all() + validate_format() diff --git a/admin_scripts/site_scraper.py b/admin_scripts/site_scraper.py new file mode 100644 index 0000000..294a93e --- /dev/null +++ b/admin_scripts/site_scraper.py @@ -0,0 +1,124 @@ +"""Web scraper for the hunt website to download unlocked levels + +Useful for archiving levels if the previous admin forgot to.""" +import argparse +import json +import os +import sys +from typing import Tuple + +import bs4 +import requests + + +class PageLevelData(object): + def __init__(self, level_num: int, previous_level_name: str, previous_level_coords: Tuple[str, str], image_urls: list): + self.level_num = level_num + self.previous_level_name = previous_level_name + self.previous_level_coords = previous_level_coords + self.image_urls = image_urls + self.level_name = None + self.level_coords = None + + +def print_err(message): + print(message, file=sys.stderr) + + +def scrape_level(level_num) -> str: + url = f"https://www.e-treasure-hunt.com/level/{level_num}" + r = requests.get(url, headers={"cookie": COOKIE}) + if r.ok: + return r.text + else: + print_err("%d: %s" % (r.status_code, r.text)) + return "" + + +def parse_level_data_from_html(html_text, level_num): + soup = bs4.BeautifulSoup(html_text, features="html.parser") + + previous_level_name = soup.body.find("div", "heading").h1.contents[0] + previous_level_coords_h3 = soup.body.find("h3") + if previous_level_coords_h3: + previous_level_coords_string = previous_level_coords_h3.contents[0] + x, y = previous_level_coords_string.split(",") + previous_level_coords = (x.strip(), y.strip()) + else: + previous_level_coords = None + + hint_elements = soup.body.find_all("img", "hint") + img_srcs = [hint_element["src"] for hint_element in hint_elements] + # print(repr(hint_elements)) + return PageLevelData(previous_level_name=previous_level_name, + previous_level_coords=previous_level_coords, + image_urls=img_srcs, + level_num=level_num) + + +def main(save_off_directory="."): + levels = [] + for level_num in range(MIN_LEVEL, MAX_LEVEL + 1): + html_text = scrape_level(level_num) + level_data = parse_level_data_from_html(html_text, level_num=level_num) + levels.append(level_data) + + # Correct off-by-one on name, coords + for i, level in enumerate(levels): + if i + 1 < len(levels): + level.level_name = levels[i + 1].previous_level_name + level.level_coords = levels[i + 1].previous_level_coords + + # Save off data + if not os.path.exists(save_off_directory): + os.mkdir(save_off_directory) + + for level in levels: + level_directory = os.path.join(save_off_directory, str(level.level_num)) + if not os.path.exists(level_directory): + os.mkdir(level_directory) + + # N.B. missing tolerance + x_coord = level.level_coords[0] if level.level_coords is not None else "" + y_coord = level.level_coords[1] if level.level_coords is not None else "" + json_data = json.dumps({"name": level.level_name, "latitude": x_coord, "longitude": y_coord}, indent=2) + with open(os.path.join(level_directory, "about.json"), "w") as f: + f.write(json_data) + + for i, img_url in enumerate(level.image_urls): + img_response = requests.get(img_url) + if img_response.ok: + file_ext = "img" + if "Content-Type" in img_response.headers: + content_type = img_response.headers["Content-Type"] + if content_type == "image/png": + file_ext = "png" + elif content_type == "image/jpeg": + file_ext = "jpeg" + else: + print_err("Unknown content type: %s" % content_type) + else: + print_err("No content type for %s response!" % img_url) + + img_filename = f"img{i}.{file_ext}" + with open(os.path.join(level_directory, img_filename), "wb") as f: + f.write(img_response.content) + pass + else: + print_err("%d: %s" % (img_response.status_code, img_response.text)) + + +if __name__ == '__main__': + argparser = argparse.ArgumentParser() + argparser.add_argument("cookie", help="Site cookie, in format 'csrftoken=; sessionid='") + argparser.add_argument("save_dir", help="Path into a directory into which to save the levels. " + "Script will create it if it doesn't exist.") + argparser.add_argument("minlevel", help="Minimum level number, usually 1") + argparser.add_argument("maxlevel", help="Maximum level number." + "Script will actually scrape maxlevel+1 because of the way the level title " + "and coords are only revealed on the subsequent page") + args = argparser.parse_args() + COOKIE = args.cookie + MIN_LEVEL = args.minlevel + MAX_LEVEL = args.maxlevel + main(save_off_directory=args.save_dir) diff --git a/upload.py b/admin_scripts/upload.py old mode 100755 new mode 100644 similarity index 100% rename from upload.py rename to admin_scripts/upload.py