From 38cee6e2aa9489339332146bd4ea331ff2ac164c Mon Sep 17 00:00:00 2001
From: TGS <tgs.secondary@gmail.com>
Date: Sat, 1 Jul 2023 16:59:42 +0100
Subject: [PATCH] Admin scripts to facilitate running the hunt

---
 .gitignore                           |   3 +
 README.md                            |   9 +-
 admin_scripts/calculate_winners.py   |  62 +++++++++++++
 admin_scripts/level_validation.py    | 132 +++++++++++++++++++++++++++
 admin_scripts/site_scraper.py        | 124 +++++++++++++++++++++++++
 upload.py => admin_scripts/upload.py |   0
 6 files changed, 328 insertions(+), 2 deletions(-)
 create mode 100644 admin_scripts/calculate_winners.py
 create mode 100644 admin_scripts/level_validation.py
 create mode 100644 admin_scripts/site_scraper.py
 rename upload.py => admin_scripts/upload.py (100%)
 mode change 100755 => 100644

diff --git a/.gitignore b/.gitignore
index 95323ba..5d6c7b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,6 @@
 /media/
 /static/
 /treasure.sqlite
+
+# e.g. Pycharm config
+.idea/
diff --git a/README.md b/README.md
index 5233971..24d0aa8 100644
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ docker run \
   e-treasure-hunt
 ```
 
-To use Google maps, you will also need to pass `GM_API_KEY` to this container as
+To use Google Maps, you will also need to pass `GM_API_KEY` to this container as
 an environment variable.
 
 # Initiating the app
@@ -105,9 +105,14 @@ and N+1.
 You can use the files in `dummy_files.zip`, updating `blurb.txt` at level 0 with
 text for the start of the hunt.
 
+It is recommended that, prior to attempting upload, that [level_validation.py](admin_scripts/level_validation.py)
+be run over the levels. This will catch numerous formatting problems with the levels before wasting your
+time/bandwidth on server upload, and will also catch several conditions that are not technically errors
+but are undesirable, such as empty README.md files and too-tight tolerances.
+
 ### Level upload through the API
 
-[upload.py](upload.py) contains utilities for uploading levels and hints.
+[upload.py](admin_scripts/upload.py) contains utilities for uploading levels and hints.
 
 You'll need to update the `SERVER` and credentials at the top of the file, and
 then re-arrange `main()` as appropriate to upload your levels.
diff --git a/admin_scripts/calculate_winners.py b/admin_scripts/calculate_winners.py
new file mode 100644
index 0000000..aead04f
--- /dev/null
+++ b/admin_scripts/calculate_winners.py
@@ -0,0 +1,62 @@
+"""Parse the hunt events CSV downloaded from the hunt website to see who won by various metrics
+
+ADV = team advanced to that level
+REQ = team requested a hint
+
+Edit the values of the constants at the top of this file for your purposes, e.g.
+START_TIME, TEAM_NAMES, etc.
+"""
+import csv
+import datetime
+from collections import defaultdict
+
+# Start time
+START_TIME = datetime.datetime.strptime("2000-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
+# 2.0 hours per hint
+# N.B. assumes all hints _requested_ take a penalty,
+# script will need editing if you want to only account for hints _used_
+PENALTY_PER_HINT_IN_HOURS = 2.0
+# "Final" level, the advance to which encodes that the team finished
+FINAL_LEVEL = "51"
+# List of team names as strings
+TEAM_NAMES = []
+# Path to hunt event csv taken from the website
+CSV_FILE_PATH = r"C:\Users\username\Downloads\hunt.huntevent.csv"
+
+
+def main(csv_file):
+    teams = TEAM_NAMES
+    team_raw_times = defaultdict(float)
+    team_running_totals = defaultdict(float)
+    team_hints_requested = defaultdict(int)
+    team_levels = defaultdict(int)
+
+    with open(csv_file, encoding="utf-8") as f:
+        csv_reader = csv.DictReader(f)
+
+        for line in csv_reader:
+            team = line["user"]
+            assert team in teams
+            # penalty of x hours per hint
+            if line["type"] == "REQ":
+                team_running_totals[team] += PENALTY_PER_HINT_IN_HOURS
+                team_hints_requested[team] += 1
+            elif line["type"] == "ADV":
+                team_levels[team] += 1
+                # Final level
+                if line["level"] == FINAL_LEVEL:
+                    timestamp = line["time"].split(".")[0]
+                    finish_time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
+                    time_taken = (finish_time - START_TIME).total_seconds() / 60 / 60
+                    print(time_taken)
+                    team_running_totals[team] += time_taken
+                    team_raw_times[team] = time_taken
+
+    print("Raw times", team_raw_times)
+    print("Running totals", team_running_totals)
+    print("Hints requested", team_hints_requested)
+    print("Team levels completed", team_levels)
+
+
+if __name__ == '__main__':
+    main(CSV_FILE_PATH)
diff --git a/admin_scripts/level_validation.py b/admin_scripts/level_validation.py
new file mode 100644
index 0000000..255c392
--- /dev/null
+++ b/admin_scripts/level_validation.py
@@ -0,0 +1,132 @@
+"""Clientside validator for levels
+
+Some of these checks just make sure that the hunt website won't reject the upload
+(without having to actually attempt such an upload).
+
+Other checks are for admin-y things like:
+- Tolerances that are suspiciously tight
+- README.md files (which are supposed to contain a detailed explanation of the structure of the level for the GM's use)
+  being smaller than blurb.txt files (which are supposed to be a hunter-consumable précis of the level
+  answer/concept once they've solved it)
+"""
+import argparse
+import json
+import os
+import re
+import zipfile
+from pathlib import Path
+
+
+CONTENT_TYPES = {
+    ".jpeg": "image/jpeg",
+    ".jpg": "image/jpeg",
+    ".png": "image/png",
+}
+
+
+def unzip_all():
+    for filename in os.listdir(ALL_LEVELS_DIR):
+        if filename.endswith(".zip"):
+            folder_path = os.path.join(ALL_LEVELS_DIR, filename[:-4])
+            if not os.path.exists(folder_path):
+                with zipfile.ZipFile(os.path.join(ALL_LEVELS_DIR, filename)) as zip_ref:
+                    zip_ref.extractall(folder_path)
+
+
+def validate_format():
+    count = 0
+    for filename in os.listdir(ALL_LEVELS_DIR):
+        dir_path = ALL_LEVELS_DIR / filename
+        if os.path.isdir(dir_path) and not "DUMMY" in filename:
+            count += 1
+            if not os.path.exists(dir_path / "about.json"):
+                print("No json in", filename)
+            else:
+                # Check json for values
+                with open(dir_path / "about.json") as f:
+                    check_json(f, filename)
+
+            if not os.path.exists(dir_path / "readme.md"):
+                print("No readme in", filename)
+
+            if not os.path.exists(dir_path / "blurb.txt"):
+                print("No blurb in", filename)
+
+            # Check readme is bigger than blurb
+            if os.path.exists(dir_path / "blurb.txt") and os.path.exists(dir_path / "readme.md") \
+                    and os.path.getsize(dir_path / "blurb.txt") > os.path.getsize(dir_path / "readme.md"):
+                print("Blurb is bigger than readme for", filename)
+
+            images = [
+                dir_path / file
+                for file in os.listdir(dir_path)
+                if Path(file).suffix.lower() in CONTENT_TYPES
+            ]
+
+            # Should find exactly the right number - check the file extensions if not.
+            if len(images) != 5:
+                print(f"Found {len(images)} images in {dir_path}")
+            else:
+                images.sort(key=lambda x: x.name.lower())
+                if not images[0].name.startswith("clue"):
+                    print("No clue in", filename)
+
+                # Check the images aren't too big or bad things will happen to the upload
+                # We don't want a repeat of the Wawrinka incident
+                for image in images:
+                    image_size = os.path.getsize(image)
+                    if image_size > 3 * 1000 * 1000:  # ~3 MB
+                        print("Image", image, "is too big in", filename, "size = ", f"{image_size:,}")
+
+                for i in range(1, 5):
+                    if not images[i].name.startswith("hint"):
+                        print("No hint", i, "in", filename)
+
+    print("Analyzed", count, "levels")
+
+
+def check_coord(coord: str, coord_name, filename):
+    lat = float(coord)
+    if not lat:
+        print("No", coord_name, "for level", filename)
+    elif lat == 0.0:
+        print("  warning: 0", coord_name, "for level", filename)
+
+    numbers_and_dp_only = re.sub("[^0-9.]", "", coord)
+    a, b = numbers_and_dp_only.split(".") if "." in coord else (coord, "")
+    if len(b) > 5:
+        print("More than 5 dp for", coord_name, "for level", filename, ":", coord)
+    if len(a) + len(b) > 7:
+        print("More than 7 digits for", coord_name, "for level", filename, ":", coord)
+
+
+def check_json(f, filename):
+    json_data = json.load(f)
+    if not len(json_data["name"]) > 0:
+        print("No name for level", filename)
+
+    check_coord(json_data["latitude"], "lat", filename)
+    check_coord(json_data["longitude"], "long", filename)
+
+    tol = int(json_data["tolerance"])
+    if not tol:
+        print("No tolerance for level", filename)
+    elif tol < 1:
+        print("0 tolerance for level", filename)
+    elif tol < 20:
+        print("Too-low-resolution tolerance of", tol, "for level", filename)
+    elif tol <= 50:
+        print("  warning: Small tolerance of", tol, "for level", filename)
+
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("input_directory",
+                           help="Path to a directory containing the (possibly zipped) levels to be examined")
+    args = argparser.parse_args()
+    ALL_LEVELS_DIR = Path(args.input_directory)
+    assert ALL_LEVELS_DIR.exists()
+    assert ALL_LEVELS_DIR.is_dir()
+
+    unzip_all()
+    validate_format()
diff --git a/admin_scripts/site_scraper.py b/admin_scripts/site_scraper.py
new file mode 100644
index 0000000..294a93e
--- /dev/null
+++ b/admin_scripts/site_scraper.py
@@ -0,0 +1,124 @@
+"""Web scraper for the hunt website to download unlocked levels
+
+Useful for archiving levels if the previous admin forgot to."""
+import argparse
+import json
+import os
+import sys
+from typing import Tuple
+
+import bs4
+import requests
+
+
+class PageLevelData(object):
+    def __init__(self, level_num: int, previous_level_name: str, previous_level_coords: Tuple[str, str], image_urls: list):
+        self.level_num = level_num
+        self.previous_level_name = previous_level_name
+        self.previous_level_coords = previous_level_coords
+        self.image_urls = image_urls
+        self.level_name = None
+        self.level_coords = None
+
+
+def print_err(message):
+    print(message, file=sys.stderr)
+
+
+def scrape_level(level_num) -> str:
+    url = f"https://www.e-treasure-hunt.com/level/{level_num}"
+    r = requests.get(url, headers={"cookie": COOKIE})
+    if r.ok:
+        return r.text
+    else:
+        print_err("%d: %s" % (r.status_code, r.text))
+        return ""
+
+
+def parse_level_data_from_html(html_text, level_num):
+    soup = bs4.BeautifulSoup(html_text, features="html.parser")
+
+    previous_level_name = soup.body.find("div", "heading").h1.contents[0]
+    previous_level_coords_h3 = soup.body.find("h3")
+    if previous_level_coords_h3:
+        previous_level_coords_string = previous_level_coords_h3.contents[0]
+        x, y = previous_level_coords_string.split(",")
+        previous_level_coords = (x.strip(), y.strip())
+    else:
+        previous_level_coords = None
+
+    hint_elements = soup.body.find_all("img", "hint")
+    img_srcs = [hint_element["src"] for hint_element in hint_elements]
+    # print(repr(hint_elements))
+    return PageLevelData(previous_level_name=previous_level_name,
+                         previous_level_coords=previous_level_coords,
+                         image_urls=img_srcs,
+                         level_num=level_num)
+
+
+def main(save_off_directory="."):
+    levels = []
+    for level_num in range(MIN_LEVEL, MAX_LEVEL + 1):
+        html_text = scrape_level(level_num)
+        level_data = parse_level_data_from_html(html_text, level_num=level_num)
+        levels.append(level_data)
+
+    # Correct off-by-one on name, coords
+    for i, level in enumerate(levels):
+        if i + 1 < len(levels):
+            level.level_name = levels[i + 1].previous_level_name
+            level.level_coords = levels[i + 1].previous_level_coords
+
+    # Save off data
+    if not os.path.exists(save_off_directory):
+        os.mkdir(save_off_directory)
+
+    for level in levels:
+        level_directory = os.path.join(save_off_directory, str(level.level_num))
+        if not os.path.exists(level_directory):
+            os.mkdir(level_directory)
+
+        # N.B. missing tolerance
+        x_coord = level.level_coords[0] if level.level_coords is not None else ""
+        y_coord = level.level_coords[1] if level.level_coords is not None else ""
+        json_data = json.dumps({"name": level.level_name, "latitude": x_coord, "longitude": y_coord}, indent=2)
+        with open(os.path.join(level_directory, "about.json"), "w") as f:
+            f.write(json_data)
+
+        for i, img_url in enumerate(level.image_urls):
+            img_response = requests.get(img_url)
+            if img_response.ok:
+                file_ext = "img"
+                if "Content-Type" in img_response.headers:
+                    content_type = img_response.headers["Content-Type"]
+                    if content_type == "image/png":
+                        file_ext = "png"
+                    elif content_type == "image/jpeg":
+                        file_ext = "jpeg"
+                    else:
+                        print_err("Unknown content type: %s" % content_type)
+                else:
+                    print_err("No content type for %s response!" % img_url)
+
+                img_filename = f"img{i}.{file_ext}"
+                with open(os.path.join(level_directory, img_filename), "wb") as f:
+                    f.write(img_response.content)
+                pass
+            else:
+                print_err("%d: %s" % (img_response.status_code, img_response.text))
+
+
+if __name__ == '__main__':
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("cookie", help="Site cookie, in format 'csrftoken=<foo>; sessionid=<bar>'")
+    argparser.add_argument("save_dir", help="Path into a directory into which to save the levels. "
+                                            "Script will create it if it doesn't exist.")
+    argparser.add_argument("minlevel", help="Minimum level number, usually 1")
+    argparser.add_argument("maxlevel", help="Maximum level number."
+                                            "Script will actually scrape maxlevel+1 because of the way the level title "
+                                            "and coords are only revealed on the subsequent page")
+    args = argparser.parse_args()
+    COOKIE = args.cookie
+    MIN_LEVEL = args.minlevel
+    MAX_LEVEL = args.maxlevel
+    main(save_off_directory=args.save_dir)
diff --git a/upload.py b/admin_scripts/upload.py
old mode 100755
new mode 100644
similarity index 100%
rename from upload.py
rename to admin_scripts/upload.py