Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: check cache hash before using #1555

Merged
merged 7 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ '3.7', '3.8', '3.9', '3.10' ]
python-version: ['3.8', '3.9', '3.10' ]

name: Base (${{ matrix.python-version }})

Expand Down Expand Up @@ -58,7 +58,7 @@ jobs:

strategy:
matrix:
python-version: [ '3.7', '3.10' ]
python-version: ['3.10' ]

name: Production (${{ matrix.python-version }})

Expand Down Expand Up @@ -96,7 +96,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ '3.7', '3.10' ]
python-version: ['3.10' ]

name: Tests (${{ matrix.python-version }})

Expand All @@ -120,11 +120,6 @@ jobs:
with:
node-version: 18

- uses: actions/setup-node@v3
if: ${{ matrix.python-version == '3.7' }}
with:
node-version: 14

- run: |
wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb;
sudo apt install ./wkhtmltox_0.12.6-1.focal_amd64.deb;
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Bench is a command-line utility that helps you to install, update, and manage mu

<div align="center">
<a target="_blank" href="https://www.python.org/downloads/" title="Python version">
<img src="https://img.shields.io/badge/python-%3E=_3.7-green.svg">
<img src="https://img.shields.io/badge/python-%3E=_3.8-green.svg">
</a>
<a target="_blank" href="https://app.travis-ci.com/github/frappe/bench" title="CI Status">
<img src="https://app.travis-ci.com/frappe/bench.svg?branch=develop">
Expand Down
155 changes: 109 additions & 46 deletions bench/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import shutil
import subprocess
import sys
import uuid
import tarfile
import typing
from collections import OrderedDict
Expand Down Expand Up @@ -34,6 +35,7 @@
is_valid_frappe_branch,
log,
run_frappe_cmd,
get_file_md5,
)
from bench.utils.bench import build_assets, install_python_dev_dependencies
from bench.utils.render import step
Expand Down Expand Up @@ -338,45 +340,51 @@ def validate_app_dependencies(self, throw=False) -> None:
def get_app_path(self) -> Path:
return Path(self.bench.name) / "apps" / self.app_name

def get_app_cache_path(self, is_compressed=False) -> Path:
assert self.cache_key is not None

def get_app_cache_temp_path(self, is_compressed=False) -> Path:
cache_path = get_bench_cache_path("apps")
tarfile_name = get_cache_filename(
self.app_name,
self.cache_key,
is_compressed,
)
ext = "tgz" if is_compressed else "tar"
tarfile_name = f"{self.app_name}.{uuid.uuid4().hex}.{ext}"
return cache_path / tarfile_name

def get_app_cache_hashed_path(self, temp_path: Path) -> Path:
assert self.cache_key is not None

ext = temp_path.suffix[1:]
md5 = get_file_md5(temp_path)
tarfile_name = f"{self.app_name}.{self.cache_key}.md5-{md5}.{ext}"

return temp_path.with_name(tarfile_name)

def get_cached(self) -> bool:
if not self.cache_key:
return False

cache_path = self.get_app_cache_path(False)
mode = "r"

# Check if cache exists without gzip
if not cache_path.is_file():
cache_path = self.get_app_cache_path(True)
mode = "r:gz"

# Check if cache exists with gzip
if not cache_path.is_file():
if not (cache_path := validate_cache_and_get_path(self.app_name, self.cache_key)):
return False

app_path = self.get_app_path()
if app_path.is_dir():
shutil.rmtree(app_path)

click.secho(f"Getting {self.app_name} from cache", fg="yellow")
click.secho(
f"Bench app-cache: extracting {self.app_name} from {cache_path.as_posix()}",
)

mode = "r:gz" if cache_path.suffix.endswith(".tgz") else "r"
with tarfile.open(cache_path, mode) as tar:
extraction_filter = get_app_cache_extract_filter(count_threshold=150_000)
try:
tar.extractall(app_path.parent, filter=extraction_filter)
click.secho(
f"Bench app-cache: extraction succeeded for {self.app_name}",
fg="green",
)
except Exception:
message = f"Cache extraction failed for {self.app_name}, skipping cache"
click.secho(message, fg="yellow")
message = f"Bench app-cache: extraction failed for {self.app_name}"
click.secho(
message,
fg="yellow",
)
logger.exception(message)
shutil.rmtree(app_path)
return False
Expand All @@ -392,10 +400,10 @@ def set_cache(self, compress_artifacts=False) -> bool:
return False

cwd = os.getcwd()
cache_path = self.get_app_cache_path(compress_artifacts)
cache_path = self.get_app_cache_temp_path(compress_artifacts)
mode = "w:gz" if compress_artifacts else "w"

message = f"Caching {self.app_name} app directory"
message = f"Bench app-cache: caching {self.app_name}"
if compress_artifacts:
message += " (compressed)"
click.secho(message)
Expand All @@ -407,9 +415,19 @@ def set_cache(self, compress_artifacts=False) -> bool:
try:
with tarfile.open(cache_path, mode) as tar:
tar.add(app_path.name)

hashed_path = self.get_app_cache_hashed_path(cache_path)
unlink_no_throw(hashed_path)

cache_path.rename(hashed_path)
click.secho(
f"Bench app-cache: caching succeeded for {self.app_name} as {hashed_path.as_posix()}",
fg="green",
)

success = True
except Exception:
log(f"Failed to cache {app_path}", level=3)
except Exception as exc:
log(f"Bench app-cache: caching failed for {self.app_name} {exc}", level=3)
success = False
finally:
os.chdir(cwd)
Expand Down Expand Up @@ -437,28 +455,11 @@ def can_get_cached(app_name: str, cache_key: str) -> bool:
checking local remote and fetching can be skipped while keeping
get-app command params the same.
"""
cache_path = get_bench_cache_path("apps")
tarfile_path = cache_path / get_cache_filename(
app_name,
cache_key,
True,
)

if tarfile_path.is_file():
return True

tarfile_path = cache_path / get_cache_filename(
app_name,
cache_key,
False,
)

return tarfile_path.is_file()

if cache_path := get_app_cache_path(app_name, cache_key):
return cache_path.exists()

def get_cache_filename(app_name: str, cache_key: str, is_compressed=False):
ext = "tgz" if is_compressed else "tar"
return f"{app_name}-{cache_key[:10]}.{ext}"
return False


def can_frappe_use_cached(app: App) -> bool:
Expand All @@ -482,7 +483,10 @@ def can_frappe_use_cached(app: App) -> bool:
"""
return sv.Version("15.12.0") not in sv.SimpleSpec(min_frappe)
except ValueError:
click.secho(f"Invalid value found for frappe version '{min_frappe}'", fg="yellow")
click.secho(
f"Bench app-cache: invalid value found for frappe version '{min_frappe}'",
fg="yellow",
)
# Invalid expression
return False

Expand Down Expand Up @@ -591,6 +595,10 @@ def remove_unused_node_modules(app_path: Path) -> None:
can_delete = "vite build" in build_script

if can_delete:
click.secho(
f"Bench app-cache: removing {node_modules.as_posix()}",
fg="yellow",
)
shutil.rmtree(node_modules)


Expand Down Expand Up @@ -1036,3 +1044,58 @@ def get_apps_json(path):

with open(path) as f:
return json.load(f)


def is_cache_hash_valid(cache_path: Path) -> bool:
parts = cache_path.name.split(".")
if len(parts) < 2 or not parts[-2].startswith("md5-"):
return False

md5 = parts[-2].split("-")[1]
return get_file_md5(cache_path) == md5


def unlink_no_throw(path: Path):
if not path.exists():
return

try:
path.unlink(True)
except Exception:
pass


def get_app_cache_path(app_name: str, cache_key: str) -> "Optional[Path]":
cache_path = get_bench_cache_path("apps")
glob_pattern = f"{app_name}.{cache_key}.md5-*"

for app_cache_path in cache_path.glob(glob_pattern):
return app_cache_path

return None


def validate_cache_and_get_path(app_name: str, cache_key: str) -> "Optional[Path]":
if not cache_key:
return

if not (cache_path := get_app_cache_path(app_name, cache_key)):
return

if not cache_path.is_file():
click.secho(
f"Bench app-cache: file check failed for {cache_path.as_posix()}, skipping cache",
fg="yellow",
)
unlink_no_throw(cache_path)
return

if not is_cache_hash_valid(cache_path):
click.secho(
f"Bench app-cache: hash validation failed for {cache_path.as_posix()}, skipping cache",
fg="yellow",
)
unlink_no_throw(cache_path)
return

return cache_path
14 changes: 14 additions & 0 deletions bench/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
import subprocess
import sys
import hashlib
from functools import lru_cache
from glob import glob
from pathlib import Path
Expand All @@ -23,6 +24,12 @@
InvalidRemoteException,
)

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Optional


logger = logging.getLogger(PROJECT_NAME)
paths_in_app = ("hooks.py", "modules.txt", "patches.txt")
paths_in_bench = ("apps", "sites", "config", "logs", "config/pids")
Expand Down Expand Up @@ -605,3 +612,10 @@ def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]:
return None

return filter_function

def get_file_md5(p: Path) -> "str":
with open(p.as_posix(), "rb") as f:
file_md5 = hashlib.md5()
while chunk := f.read(2**16):
file_md5.update(chunk)
return file_md5.hexdigest()
6 changes: 3 additions & 3 deletions bench/utils/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,15 +688,15 @@ def cache_list() -> None:
created = datetime.fromtimestamp(stat.st_ctime)
accessed = datetime.fromtimestamp(stat.st_atime)

app = item.name.split("-")[0]
app = item.name.split(".")[0]
tot_items += 1
tot_size += stat.st_size
compressed = item.suffix == ".tgz"

if not printed_header:
click.echo(
f"{'APP':15} "
f"{'FILE':25} "
f"{'FILE':90} "
f"{'SIZE':>13} "
f"{'COMPRESSED'} "
f"{'CREATED':19} "
Expand All @@ -706,7 +706,7 @@ def cache_list() -> None:

click.echo(
f"{app:15} "
f"{item.name:25} "
f"{item.name:90} "
f"{size_mb:10.3f} MB "
f"{str(compressed):10} "
f"{created:%Y-%m-%d %H:%M:%S} "
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "frappe-bench"
description = "CLI to manage Multi-tenant deployments for Frappe apps"
readme = "README.md"
license = "GPL-3.0-only"
requires-python = ">=3.7"
requires-python = ">=3.8"
authors = [
{ name = "Frappe Technologies Pvt Ltd", email = "developers@frappe.io" },
]
Expand Down
Loading