diff --git a/.github/workflows/script_module_list.yml b/.github/workflows/script_module_list.yml new file mode 100644 index 00000000000..0c858195873 --- /dev/null +++ b/.github/workflows/script_module_list.yml @@ -0,0 +1,49 @@ +name: Module overview script (lint + test) +on: + push: + paths: + - 'scripts/**' + - './.github/**' + pull_request: + paths: + - 'scripts/**' + - './.github/**' + +# Declare default permissions as read only. +permissions: read-all +jobs: + + flake8-lint: + runs-on: ubuntu-20.04 + name: Lint + steps: + - name: Check out source repository + uses: actions/checkout@v3 + - name: Set up Python environment + uses: actions/setup-python@v4 + with: + python-version: "3.6" + - name: flake8 Lint + uses: py-actions/flake8@v2 + with: + max-line-length: "120" + path: "scripts/module_overview" + + pytest-tests: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.6' + - name: Install dependencies + run: | + cd scripts/module_overview + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements_tests.txt + - name: Test with pytest + run: | + cd scripts/module_overview + ./test.sh \ No newline at end of file diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 00000000000..8d0b1237666 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,3 @@ +Scripts that can be used to automatically generate markdown files, can be found here. + +* [`module_overview`](module_overview): script to generate overview of available environment modules; \ No newline at end of file diff --git a/scripts/module_overview/README.md b/scripts/module_overview/README.md new file mode 100644 index 00000000000..95d61ad12c9 --- /dev/null +++ b/scripts/module_overview/README.md @@ -0,0 +1,95 @@ +# Module overview + +A script to generate an table overview of all available module files in MarkDown format, +which indicates on which clusters each module is available. + +## Requirements +- Required Python packages are listed in `requirements.txt` and `requirements_tests.txt` +- [Lmod](https://github.com/TACC/Lmod) must be available, and `$LMOD_CMD` must specify the path to the `lmod` binary. + + +### Creating a virtual environment (optional) + +If the required Python packages are not available in your Python setup, +you can easily create a dedicated virtual environment as follows: + +```shell +python -m venv module_overview_venv +source module_overview_venv/bin/activate +pip install -r requirements.txt +pip install -r requirements_tests.txt +# to exit the virtual environment, run 'deactivate' +``` + +## Usage +You can run the script with following command: + +```shell +python module_overview.py +``` + +## Testing +You can run the tests by running the `test.sh` script. +```shell +./test.sh +``` + +The tests make use of a mocked `$LMOD_CMD` script, which you can find [here](tests/data/lmod_mock.sh). + +### Write tests +If you want to write additional tests and use the script effectively, follow these guidelines: + + +1. **Setting up mocked Lmod:** + + Before each test, ensure that you set the path to the script that mocks the `lmod` binary. + This can be done within the setup_class function. + ```python + path = os.path.dirname(os.path.realpath(__file__)) + + @classmethod + def setup_class(cls): + os.environ["LMOD_CMD"] = cls.path + "/data/lmod_mock.sh" + ``` + +2. **Mocking output of `module avail cluster` command:** + + The output of the command `module avail cluster/` can be put in a `.txt` file. + Set the path to this file in the `$MOCK_FILE_AVAIL_CLUSTER` environment variable. + ```python + os.environ["MOCK_FILE_AVAIL_CLUSTER"] = path + "/data/data_avail_cluster_simple.txt" + ``` + +3. **Mocking the `module swap` command:** + + For mocking the `module swap` command, assign the path to the swap files to the `$MOCK_FILE_SWAP` environment variable. + Ensure that the filename contains the placeholder '`CLUSTER`', + which will later be replaced with the actual cluster name when performing the swap. + + ```python + os.environ["MOCK_FILE_SWAP"] = path + "/data/data_swap_CLUSTER.txt" + ``` + When trying to swap to, for example, the `cluster/pikachu` cluster, + it will use the `data_swap_pikachu.txt` file as output for the swap command. + +### Example +An example of a possible `setup_class` function is given below. +```python +import os + +@classmethod +def setup_class(cls): + os.environ["TESTS_PATH"] = cls.path + os.environ["LMOD_CMD"] = cls.path + "/data/lmod_mock.sh" + os.environ["MOCK_FILE_AVAIL_CLUSTER"] = cls.path + "/data/data_avail_cluster_simple.txt" + os.environ["MOCK_FILE_SWAP"] = cls.path + "/data/data_swap_CLUSTER.txt" +``` + +This does multiple things: +1. Set the path of the tests folder in `$TESTS_PATH` +2. Set the path to the `lmod_mock.sh` script in the environment variable `$LMOD_CMD` +3. Set the output file for the `module avail cluster/` to the `MOCK_FILE_AVAIL_CLUSTER` variable. + The actual output can be found in the `data/data_avail_cluster_simple.txt` file. +4. Set the swap files output to the `MOCK_FILE_SWAP` variable. + Files with swap outut will have the `data/data_swap_CLUSTER.txt`. + For example, `data/data_swap_pikachu.txt` could be a possible file. \ No newline at end of file diff --git a/scripts/module_overview/module_overview.py b/scripts/module_overview/module_overview.py new file mode 100644 index 00000000000..45556d53280 --- /dev/null +++ b/scripts/module_overview/module_overview.py @@ -0,0 +1,225 @@ +# +# Copyright 2023-2023 Ghent University +# +# This file is part of vsc_user_docs, +# originally created by the HPC team of Ghent University (http://ugent.be/hpc/en), +# with support of Ghent University (http://ugent.be/hpc), +# the Flemish Supercomputer Centre (VSC) (https://www.vscentrum.be), +# the Flemish Research Foundation (FWO) (http://www.fwo.be/en) +# and the Department of Economy, Science and Innovation (EWI) (http://www.ewi-vlaanderen.be/en). +# +# https://github.com/hpcugent/vsc_user_docs +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +""" +Python script to generate an overview of available modules across different clusters, in MarkDown format. + +@author: Michiel Lachaert (Ghent University) +""" + +import numpy as np +import os +import subprocess +from mdutils.mdutils import MdUtils +from typing import Union, Tuple + + +# -------------------------------------------------------------------------------------------------------- +# Functions to run "module" commands +# -------------------------------------------------------------------------------------------------------- + +def module(*args, filter_fn=lambda x: x) -> np.ndarray: + """ + Function to run "module" commands. + + @param args: Extra arguments for the module command. + @param filter_fn: Filter function on the ouput. + @return: Array with the output of the module command. + """ + lmod = os.getenv('LMOD_CMD') + proc = subprocess.run( + [lmod, "python", "--terse"] + list(args), + encoding="utf-8", + stderr=subprocess.PIPE, + stdout=subprocess.PIPE + ) + exec(proc.stdout) + return filter_fn(np.array(proc.stderr.split())) + + +def module_avail(name: str = "", filter_fn=lambda x: x) -> np.ndarray: + """ + Function to run "module avail" commands. + + @param name: Module name, or empty string to return all available modules. + @param filter_fn: Filter on the output. + @return: List of all available modules of name, or all if name is not given. + """ + return module("avail", name, filter_fn=filter_fn) + + +def module_swap(name: str) -> None: + """ + Function to run "module swap" commands. + + @param name: Name of module you want to swap to. + """ + module("swap", name) + + +# -------------------------------------------------------------------------------------------------------- +# Fetch data +# -------------------------------------------------------------------------------------------------------- + +def filter_fn_gent_cluster(data: np.ndarray) -> np.ndarray: + """ + Filter function for output of "module avail" commands on HPC-UGent infrastructure. + + Filters out lines ending with ':' (which are paths to module files), + and lines starting with 'env/' or 'cluster/default', which are not actually software modules + @param data: Output + @return: Filtered output + """ + return data[~np.char.endswith(data, ":") & + ~np.char.startswith(data, "env/") & + ~np.char.startswith(data, "cluster/default") + ] + + +def filter_fn_gent_modules(data: np.ndarray) -> np.ndarray: + """ + Filter function for the output of all software modules (excl. `cluster` and `env` modules). + @param data: Output + @return: Filtered output + """ + return data[~np.char.endswith(data, ":") & + ~np.char.startswith(data, "env/") & + ~np.char.startswith(data, "cluster/") + ] + + +def clusters_ugent() -> np.ndarray: + """ + Returns all the cluster names of the HPC at UGent. + @return: cluster names + """ + + return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster) + + +def modules_ugent() -> dict: + """ + Returns names of all software module that are installed on the HPC on UGent. + They are grouped by cluster. + @return: Dictionary with all the modules per cluster + """ + print("Start collecting modules:") + data = {} + for cluster in clusters_ugent(): + print(f"\t Collecting available modules for {cluster}... ", end="", flush=True) + module_swap(cluster) + data[cluster] = module_avail(filter_fn=filter_fn_gent_modules) + print(f"found {len(data[cluster])} modules!") + + print("All data collected!\n") + return data + + +# -------------------------------------------------------------------------------------------------------- +# Util functions +# -------------------------------------------------------------------------------------------------------- + +def mod_names_to_software_names(mod_list: np.ndarray) -> np.ndarray: + """ + Convert a list of module names to a list of the software names. + + @param mod_list: List of the module names + @return: List of the corresponding software names + """ + return np.unique([entry.split("/")[0] for entry in mod_list]) + + +def get_unique_software_names(data: Union[dict, list, np.ndarray]) -> Union[dict, list, np.ndarray]: + """ + Simplify list of modules by removing versions and duplicates. + + @param data: List of modules + @return: List of software names. + """ + + if isinstance(data, dict): + simplified_data = {cluster: mod_names_to_software_names(data[cluster]) for cluster in data} + else: + simplified_data = mod_names_to_software_names(data) + + return simplified_data + + +# -------------------------------------------------------------------------------------------------------- +# Generate markdown +# -------------------------------------------------------------------------------------------------------- + +def generate_table_data(avail_mods: dict) -> Tuple[np.ndarray, int, int]: + """ + Generate data that can be used to construct a MarkDown table. + + @param avail_mods: Available modules + @return: Returns tuple (Table data, #col, #row) + """ + avail_mods = get_unique_software_names(avail_mods) + all_modules = get_unique_software_names(np.concatenate(list(avail_mods.values()))) + + final = np.array([" "]) + cluster_names = [x.split('/')[1] for x in avail_mods.keys()] + final = np.append(final, cluster_names) + + for package in all_modules: + final = np.append(final, package) + + for cluster in avail_mods: + final = np.append(final, "X" if package in avail_mods[cluster] else " ") + + return final, len(cluster_names) + 1, len(all_modules) + 1 + + +def generate_module_table(data: dict, md_file: MdUtils) -> None: + """ + Generate the general table of the overview. + + @param data: Dict with all the data. Keys are the cluster names. + @param md_file: MdUtils object. + """ + print("Generating markdown table... ", end="", flush=True) + structured, col, row = generate_table_data(data) + md_file.new_table(columns=col, rows=row, text=list(structured), text_align='center') + print("Done!") + + +def generate_general_overview() -> None: + """ + Generate the general overview in a markdown file. + It generates a list of all the available software and indicates on which cluster it is available. + """ + md_fn = 'module_overview.md' + md_file = MdUtils(file_name=md_fn, title='Overview of available modules per cluster') + data = modules_ugent() + generate_module_table(data, md_file) + md_file.create_md_file() + print(f"Module overview created at {md_fn}") + + +if __name__ == '__main__': + # Generate the overview + generate_general_overview() diff --git a/scripts/module_overview/requirements.txt b/scripts/module_overview/requirements.txt new file mode 100644 index 00000000000..d19bfbded6f --- /dev/null +++ b/scripts/module_overview/requirements.txt @@ -0,0 +1,2 @@ +mdutils +numpy \ No newline at end of file diff --git a/scripts/module_overview/requirements_tests.txt b/scripts/module_overview/requirements_tests.txt new file mode 100644 index 00000000000..aad120b7c4e --- /dev/null +++ b/scripts/module_overview/requirements_tests.txt @@ -0,0 +1,2 @@ +flake8 +pytest \ No newline at end of file diff --git a/scripts/module_overview/test.sh b/scripts/module_overview/test.sh new file mode 100755 index 00000000000..85fd00b1a50 --- /dev/null +++ b/scripts/module_overview/test.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s \ No newline at end of file diff --git a/scripts/module_overview/tests/data/data_avail_cluster_simple.txt b/scripts/module_overview/tests/data/data_avail_cluster_simple.txt new file mode 100644 index 00000000000..0993a70fa8e --- /dev/null +++ b/scripts/module_overview/tests/data/data_avail_cluster_simple.txt @@ -0,0 +1,4 @@ +/etc/modulefiles/vsc: +cluster/dialga +cluster/pikachu +cluster/default diff --git a/scripts/module_overview/tests/data/data_avail_simple_dialga.txt b/scripts/module_overview/tests/data/data_avail_simple_dialga.txt new file mode 100644 index 00000000000..54015e0f190 --- /dev/null +++ b/scripts/module_overview/tests/data/data_avail_simple_dialga.txt @@ -0,0 +1,27 @@ +/apps/modules/dialga/all: +cfd/1.0 +cfd/2.0 +cfd/24 +cfd/5.0 +cfd/2.0afqsdf +Markov/hidden-1.0.5 +Markov/hidden-1.0.10 +Markov/ +science/ +science/5.3.0 +science/5.3.0 +science/5.3.0 +science/7.2.0 +/etc/modulefiles/vsc: +cluster/ +cluster/dialga +cluster/pikachu +env/slurm/ +env/slurm/dialga +env/slurm/pikachu +env/software/ +env/software/dialga +env/software/pikachu +env/vsc/ +env/vsc/dialga +env/vsc/pikachu \ No newline at end of file diff --git a/scripts/module_overview/tests/data/data_avail_simple_pikachu.txt b/scripts/module_overview/tests/data/data_avail_simple_pikachu.txt new file mode 100644 index 00000000000..eb4c614308b --- /dev/null +++ b/scripts/module_overview/tests/data/data_avail_simple_pikachu.txt @@ -0,0 +1,29 @@ +/apps/modules/pikachu/all: +cfd/1.0 +cfd/2.0 +cfd/3.0 +cfd/24 +cfd/ +cfd/5.0 +cfd/2.0afqsdf +llm/20230627 +llm/20230627 +llm/20230627 +science/ +science/5.3.0 +science/5.3.0 +science/5.3.0 +science/7.2.0 +/etc/modulefiles/vsc: +cluster/ +cluster/dialga +cluster/pikachu +env/slurm/ +env/slurm/dialga +env/slurm/pikachu +env/software/ +env/software/dialga +env/software/pikachu +env/vsc/ +env/vsc/dialga +env/vsc/pikachu \ No newline at end of file diff --git a/scripts/module_overview/tests/data/data_swap_dialga.txt b/scripts/module_overview/tests/data/data_swap_dialga.txt new file mode 100644 index 00000000000..4184ee6d7d8 --- /dev/null +++ b/scripts/module_overview/tests/data/data_swap_dialga.txt @@ -0,0 +1 @@ +os.environ["MOCK_FILE_AVAIL"] = os.getenv('TESTS_PATH') + "/data/data_avail_simple_dialga.txt" \ No newline at end of file diff --git a/scripts/module_overview/tests/data/data_swap_pikachu.txt b/scripts/module_overview/tests/data/data_swap_pikachu.txt new file mode 100644 index 00000000000..544f44f6a18 --- /dev/null +++ b/scripts/module_overview/tests/data/data_swap_pikachu.txt @@ -0,0 +1 @@ +os.environ["MOCK_FILE_AVAIL"] = os.getenv('TESTS_PATH') + "/data/data_avail_simple_pikachu.txt" diff --git a/scripts/module_overview/tests/data/lmod_mock.sh b/scripts/module_overview/tests/data/lmod_mock.sh new file mode 100755 index 00000000000..710586ac370 --- /dev/null +++ b/scripts/module_overview/tests/data/lmod_mock.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Return an error when a variable is not set. +set -u + + +# example: $LMOD_CMD python --terse avail cluster/ +python="$1" +terse="$2" +mod_cmd="$3" +mod_args="${4:-}" + +# Emulated avail command. +if [ "$mod_cmd" = "avail" ]; then + if [ "$mod_args" = "cluster/" ]; then + cat "${MOCK_FILE_AVAIL_CLUSTER}" >&2 + else + cat "${MOCK_FILE_AVAIL}" >&2 + fi + + +# Emulated swap command. +elif [ "$mod_cmd" = "swap" ]; then + # extract the cluster name from the 4th argument + cluster=$(echo "$mod_args" | cut -d "/" -f 1) + cluster_name=$(echo "$mod_args" | cut -d "/" -f 2) + + if [ "$cluster" = "cluster" ]; then + # Substitute CLUSTER by the cluster_name + cat ${MOCK_FILE_SWAP/CLUSTER/${cluster_name}} >&1 + else + echo "${mod_args} is not a cluster." >&2 + exit 1 + fi + + +else + echo "Module subcommand '${mod_cmd}' not supported yet in $0" >&2 + exit 1 +fi diff --git a/scripts/module_overview/tests/data/test_simple_solution.md b/scripts/module_overview/tests/data/test_simple_solution.md new file mode 100644 index 00000000000..2d53cd02a11 --- /dev/null +++ b/scripts/module_overview/tests/data/test_simple_solution.md @@ -0,0 +1,10 @@ + +Overview Modules +================ + +| |dialga|pikachu| +| :---: | :---: | :---: | +|Markov|X| | +|cfd|X|X| +|llm| |X| +|science|X|X| diff --git a/scripts/module_overview/tests/test_data.py b/scripts/module_overview/tests/test_data.py new file mode 100644 index 00000000000..c59c2893813 --- /dev/null +++ b/scripts/module_overview/tests/test_data.py @@ -0,0 +1,28 @@ +import os +from module_overview import modules_ugent, get_unique_software_names + + +class TestData: + # --------------------------- + # Class level setup/teardown + # --------------------------- + path = os.path.dirname(os.path.realpath(__file__)) + + @classmethod + def setup_class(cls): + os.environ["TESTS_PATH"] = cls.path + os.environ["LMOD_CMD"] = cls.path + "/data/lmod_mock.sh" + os.environ["MOCK_FILE_SWAP"] = cls.path + "/data/data_swap_CLUSTER.txt" + os.environ["MOCK_FILE_AVAIL_CLUSTER"] = cls.path + "/data/data_avail_cluster_simple.txt" + + # --------------------------- + # Module tests + # --------------------------- + + def test_data_ugent(self): + sol = modules_ugent() + assert len(sol) == 2 + assert len(sol["cluster/dialga"]) == 13 + assert len(sol["cluster/pikachu"]) == 15 + assert list(get_unique_software_names(sol["cluster/dialga"])) == ["Markov", "cfd", "science"] + assert list(get_unique_software_names(sol["cluster/pikachu"])) == ["cfd", "llm", "science"] diff --git a/scripts/module_overview/tests/test_md.py b/scripts/module_overview/tests/test_md.py new file mode 100644 index 00000000000..4aaf4d4ad10 --- /dev/null +++ b/scripts/module_overview/tests/test_md.py @@ -0,0 +1,43 @@ +from mdutils.mdutils import MdUtils +from module_overview import get_unique_software_names, modules_ugent, generate_table_data, generate_module_table +import os +import filecmp + + +class TestMarkdown: + # --------------------------- + # Class level setup/teardown + # --------------------------- + + path = os.path.dirname(os.path.realpath(__file__)) + + @classmethod + def setup_class(cls): + os.environ["TESTS_PATH"] = cls.path + os.environ["LMOD_CMD"] = cls.path + "/data/lmod_mock.sh" + os.environ["MOCK_FILE_SWAP"] = cls.path + "/data/data_swap_CLUSTER.txt" + os.environ["MOCK_FILE_AVAIL_CLUSTER"] = cls.path + "/data/data_avail_cluster_simple.txt" + + @classmethod + def teardown_class(cls): + if os.path.exists("test_simple.md"): + os.remove("test_simple.md") + + # --------------------------- + # Markdown tests + # --------------------------- + + def test_table_generate_simple(self): + simple_data = get_unique_software_names(modules_ugent()) + table_data, col, row = generate_table_data(simple_data) + assert col == 3 + assert row == 5 + assert len(table_data) == 15 + + def test_simple(self): + md_file = MdUtils(file_name='test_simple', title='Overview Modules') + simple_data = get_unique_software_names(modules_ugent()) + generate_module_table(simple_data, md_file) + md_file.create_md_file() + assert os.path.exists("test_simple.md") + assert filecmp.cmp(self.path + "/data/test_simple_solution.md", "test_simple.md") diff --git a/scripts/module_overview/tests/test_module.py b/scripts/module_overview/tests/test_module.py new file mode 100644 index 00000000000..0b8f833ec29 --- /dev/null +++ b/scripts/module_overview/tests/test_module.py @@ -0,0 +1,53 @@ +import os +from module_overview import module_avail, filter_fn_gent_modules, filter_fn_gent_cluster, module_swap + + +class TestModule: + # --------------------------- + # Class level setup/teardown + # --------------------------- + path = os.path.dirname(os.path.realpath(__file__)) + + @classmethod + def setup_class(cls): + os.environ["TESTS_PATH"] = cls.path + os.environ["LMOD_CMD"] = cls.path + "/data/lmod_mock.sh" + + # --------------------------- + # Module tests + # --------------------------- + + def test_avail(self): + os.environ["MOCK_FILE_AVAIL"] = self.path + "/data/data_avail_simple_pikachu.txt" + output = module_avail() + assert len(output) == 29 + + def test_avail_filtered(self): + os.environ["MOCK_FILE_AVAIL"] = self.path + "/data/data_avail_simple_pikachu.txt" + output = module_avail(filter_fn=filter_fn_gent_modules) + assert len(output) == 15 + assert list(output) == [ + "cfd/1.0", "cfd/2.0", "cfd/3.0", "cfd/24", "cfd/", "cfd/5.0", + "cfd/2.0afqsdf", "llm/20230627", "llm/20230627", "llm/20230627", "science/", + "science/5.3.0", "science/5.3.0", "science/5.3.0", "science/7.2.0" + ] + + def test_avail_cluster(self): + os.environ["MOCK_FILE_AVAIL_CLUSTER"] = self.path + "/data/data_avail_cluster_simple.txt" + output = module_avail(name="cluster/") + assert len(output) == 4 + + def test_avail_cluster_filtered(self): + os.environ["MOCK_FILE_AVAIL_CLUSTER"] = self.path + "/data/data_avail_cluster_simple.txt" + output = module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster) + assert len(output) == 2 + assert list(output) == ["cluster/dialga", "cluster/pikachu"] + + def test_swap(self): + os.environ["MOCK_FILE_SWAP"] = self.path + "/data/data_swap_CLUSTER.txt" + module_swap("cluster/dialga") + output1 = module_avail() + assert len(output1) == 27 + module_swap("cluster/pikachu") + output2 = module_avail() + assert len(output2) == 29