diff --git a/.github/workflows/deploy.yaml b/.github/workflows/deploy.yaml index 19342172..8028b1cf 100644 --- a/.github/workflows/deploy.yaml +++ b/.github/workflows/deploy.yaml @@ -29,15 +29,32 @@ jobs: echo prune=false >> $GITHUB_ENV fi - name: "Set max_num_workers based on PR label if present" - run: > - echo max_num_workers=` - echo '${{ toJSON(github.event.pull_request.labels.*.name) }}' | - python -c "import json, sys; - labels = json.loads(sys.stdin.read()); - max_num_workers = [l.split(':')[-1] for l in labels if l.startswith('max_num_workers:')]; - print((int(max_num_workers[0]) if max_num_workers else 1000)); - " - ` >> $GITHUB_ENV + # This is a little complicated, but the only way I know to retrieve labels on both + # `pull_request` *and* `push` events (and we want the ability to do so in both cases). + # Adapted from the following (note question in comment there re: external prs): + # https://github.com/pangeo-forge/deploy-recipe-action/blob/256da2916b5f17f358c5e5b0442458645cadb9f0/action/deploy_recipe.py#L34-L68 + shell: python3 {0} + run: | + import json + import os + import urllib.request + + repository = os.environ["GITHUB_REPOSITORY"] + api_url = os.environ["GITHUB_API_URL"] + head_ref = os.environ["GITHUB_HEAD_REF"] + sha = os.environ["GITHUB_SHA"] + + commit_sha = head_ref if head_ref else sha + pulls_url = "/".join([api_url, "repos", repository, "commits", commit_sha, "pulls"]) + pulls_txt = urllib.request.urlopen(pulls_url).read() + pulls_json = json.loads(pulls_txt) + labels = [label["name"] for label in pulls_json[0]["labels"]] + + max_num_workers = [l.split(":")[-1] for l in labels if l.startswith("max_num_workers:")] + max_num_workers = (max_num_workers[0] if max_num_workers else "1000") + with open(os.environ["GITHUB_ENV"], mode="a") as f: + f.write(f"max_num_workers={max_num_workers}") + - name: "Deploy recipes" uses: "pangeo-forge/deploy-recipe-action@v0.1" with: diff --git a/feedstock/climsim.py b/feedstock/climsim.py index 9c458365..846b2531 100644 --- a/feedstock/climsim.py +++ b/feedstock/climsim.py @@ -146,6 +146,9 @@ class OpenAndPreprocess(beam.PTransform): def expand(self, pcoll: beam.PCollection) -> beam.PCollection: return ( pcoll + # FIXME: rate limiting on caching step is probably required to get this to run + # end-to-end, without globally capping workers at a low value for all stages, + # see discussion in: https://github.com/leap-stc/data-management/issues/36. | OpenURLWithFSSpec() | OpenWithXarray( # FIXME: Get files to open without `copy_to_local=True`