From 36f2194865a2f6067639383612a648ce39feef84 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Thu, 2 Nov 2023 11:40:07 +0530 Subject: [PATCH] fix: Prevent Gunicorn child workers hangup Problem: - Prerequisite: long running reqeust is going on. - `bench restart` is requested. - supervisord sends sigterm to gunicorn master process. - gunicorn passes it to all child table and waits for graceful shutdown (30 seconds) - supervisord has no chill, and sends sigkill to master process in 10 seconds - gunicorn master proesss is dead, so now child workers will keep running until they complete request which can take a really long time. - This entire time the sites will be down. Fix: - Explicitly encode default graceful_timeout in config - 30 seconds. - Make supervisor wait 10 more seconds for gunicorn to do its thing, then only send sigkill. --- bench/config/templates/supervisor.conf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bench/config/templates/supervisor.conf b/bench/config/templates/supervisor.conf index de5f0298c..c6a0fb0d7 100644 --- a/bench/config/templates/supervisor.conf +++ b/bench/config/templates/supervisor.conf @@ -2,13 +2,15 @@ ; priority=1 --> Lower priorities indicate programs that start first and shut down last ; killasgroup=true --> send kill signal to child processes too +; graceful timeout should always be lower than stopwaitsecs to avoid orphan gunicorn workers. [program:{{ bench_name }}-frappe-web] -command={{ bench_dir }}/env/bin/gunicorn -b 127.0.0.1:{{ webserver_port }} -w {{ gunicorn_workers }} --max-requests {{ gunicorn_max_requests }} --max-requests-jitter {{ gunicorn_max_requests_jitter }} -t {{ http_timeout }} frappe.app:application --preload +command={{ bench_dir }}/env/bin/gunicorn -b 127.0.0.1:{{ webserver_port }} -w {{ gunicorn_workers }} --max-requests {{ gunicorn_max_requests }} --max-requests-jitter {{ gunicorn_max_requests_jitter }} -t {{ http_timeout }} --graceful-timeout 30 frappe.app:application --preload priority=4 autostart=true autorestart=true stdout_logfile={{ bench_dir }}/logs/web.log stderr_logfile={{ bench_dir }}/logs/web.error.log +stopwaitsecs=40 user={{ user }} directory={{ sites_dir }}