From fda05d6c69d9093001888365a82c39c06e5fc541 Mon Sep 17 00:00:00 2001 From: Maxim Fedorov Date: Tue, 17 Jan 2023 17:13:12 -0800 Subject: [PATCH] erlperf: fixes and updates to 2.1.0 Major change is to detect lock contention and apply busy wait in the primary process. It does not reduce total throughput too much, because CPU is not actually busy, so spinning on it does not introduce too much of a problem. Changes: * bumped argparse to 1.2.4 * fixed -w (--warmup) argument missing from command line * synchronised worker startup when adding concurrency * concurrent worker shutdown when reducing concurrency * elevated job & benchmark process priority to avoid result skew * implemented scheduling problem detection (e.g. lock contention), added a busy loop method workaround * backout from erlef GitHub actions CI to enable OTP 23 support --- .github/workflows/erlang.yml | 13 ++-- CHANGELOG.md | 8 +++ README.md | 11 +++ rebar.lock | 6 +- src/erlperf.app.src | 2 +- src/erlperf.erl | 114 +++++++++++++++++++++++++------- src/erlperf_app.erl | 2 +- src/erlperf_cli.erl | 4 +- src/erlperf_cluster_monitor.erl | 2 +- src/erlperf_file_log.erl | 2 +- src/erlperf_history.erl | 2 +- src/erlperf_job.erl | 68 ++++++++++++++----- src/erlperf_job_sup.erl | 2 +- src/erlperf_monitor.erl | 2 +- src/erlperf_sup.erl | 2 +- test/erlperf_SUITE.erl | 32 +++++++-- test/erlperf_file_log_SUITE.erl | 2 +- test/erlperf_history_SUITE.erl | 2 +- test/erlperf_job_SUITE.erl | 4 +- test/erlperf_monitor_SUITE.erl | 2 +- 20 files changed, 213 insertions(+), 69 deletions(-) diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml index 035a551..ab838e5 100644 --- a/.github/workflows/erlang.yml +++ b/.github/workflows/erlang.yml @@ -11,16 +11,17 @@ jobs: linux: name: Test on OTP ${{ matrix.otp_version }} and ${{ matrix.os }} runs-on: ${{ matrix.os }} + strategy: matrix: - otp_version: ['23', '24', '25'] + otp_version: [23, 24, 25] os: [ubuntu-latest] + + container: + image: erlang:${{ matrix.otp_version }} + steps: - uses: actions/checkout@v2 - - uses: erlef/setup-beam@v1 - with: - otp-version: ${{matrix.otp_version}} - rebar3-version: '3.17.0' - name: Run tests run: rebar3 ct - name: Generate documentation @@ -33,4 +34,4 @@ jobs: run: rebar3 as prod escriptize - shell: bash name: Smoke test - run: ./erlperf 'timer:sleep(1).' + run: ./erlperf 'timer:sleep(1).' \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index cf77551..f4c71fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 2.1.0 +- fixed -w (--warmup) argument missing from command line +- synchronised worker startup when adding concurrency +- concurrent worker shutdown when reducing concurrency +- elevated job & benchmark process priority to avoid result skew +- implemented scheduling problem detection (e.g. lock contention), + added a busy loop method workaround + ## 2.0.2 - added convenience command line options: init_all, done_all, init_runner_all diff --git a/README.md b/README.md index 07c52fc..faa8f98 100644 --- a/README.md +++ b/README.md @@ -289,6 +289,17 @@ halved: rand:uniform(). 1 2771 Ki 72 ns ``` +## Benchmarking under lock contention +ERTS cannot guarantee precise timing when there is severe lock contention happening, +and scheduler utilisation is 100%. This often happens with ETS: +```bash + $ ./erlperf -c 50 'ets:insert(ac_tab, {1, 2}).' +``` +Running 50 concurrent processes trying to overwrite the very same key of an ETS +table leads to lock contention on a shared resource (ETS table/bucket lock). + + + ## Concurrency test (squeeze) Sometimes it's necessary to measure code running multiple concurrent processes, and find out when it saturates the node. It can be used to diff --git a/rebar.lock b/rebar.lock index 5eec03f..3e81148 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1,8 +1,8 @@ {"1.2.0", -[{<<"argparse">>,{pkg,<<"argparse">>,<<"1.2.3">>},0}]}. +[{<<"argparse">>,{pkg,<<"argparse">>,<<"1.2.4">>},0}]}. [ {pkg_hash,[ - {<<"argparse">>, <<"20578A5D86250D7FB1BF6D116054BF707C247395E740BC8FC1384CC49DFB06F7">>}]}, + {<<"argparse">>, <<"A31E7C5D9F8814AFCD9B42C1B98C21DA6F851F93F2C8C00C107F6201668A0A7D">>}]}, {pkg_hash_ext,[ - {<<"argparse">>, <<"BFE94654811112FB86880C47B3600313D8D2281A1CCE4DADA2B09E5C3C9988D3">>}]} + {<<"argparse">>, <<"AC6FDB7183EA20ADEB7EB66E34B21F2E8C4C6925913EE0C0765D339D97009FFE">>}]} ]. diff --git a/src/erlperf.app.src b/src/erlperf.app.src index 31e079a..9ffec3b 100644 --- a/src/erlperf.app.src +++ b/src/erlperf.app.src @@ -1,6 +1,6 @@ {application, erlperf, [{description, "Erlang Performance & Benchmarking Suite"}, - {vsn, "2.0.2"}, + {vsn, "2.1.0"}, {registered, [ erlperf_sup, erlperf_job_sup, erlperf_monitor, erlperf_history, erlperf_file_log, erlperf_cluster_monitor diff --git a/src/erlperf.erl b/src/erlperf.erl index 493beb4..b0ad011 100644 --- a/src/erlperf.erl +++ b/src/erlperf.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Application API. Benchmark/squeeze implementation. %%% @end @@ -43,6 +43,11 @@ %% experimental feature allowing to benchmark processes with %% wildly jumping throughput cv => float(), + %% sets the supervising process priority. Default is 'high', + %% allowing test runner to be scheduled even under high scheduler + %% utilisation (which is expected for a benchmark running many + %% concurrent processes) + priority => erlang:priority_level(), %% report form for single benchmark: when set to 'extended', %% all non-warmup samples are returned as a list. %% When missing, only the average QPS is returned. @@ -113,12 +118,16 @@ benchmark(Codes, #{isolation := _Isolation} = Options, ConOpts) -> %% no isolation requested, do normal in-BEAM test benchmark(Codes, Options, ConOpts) -> - {Jobs, Samples, Monitors} = start_jobs(Codes, []), + %% elevate priority to reduce timer skew + SetPrio = maps:get(priority, Options, high), + PrevPriority = process_flag(priority, SetPrio), + Jobs = start_jobs(Codes, []), + {JobPids, Samples, _} = lists:unzip3(Jobs), try - benchmark(Jobs, Options, ConOpts, Samples) + benchmark(JobPids, Options, ConOpts, Samples) after - [erlang:demonitor(Mon, [flush]) || Mon <- Monitors], - [(catch gen:stop(Pid)) || Pid <- Jobs] + stop_jobs(Jobs), + process_flag(priority, PrevPriority) end. %% @doc @@ -249,7 +258,7 @@ stop_nodes(Peers, _Nodes) -> [peer:stop(Peer) || Peer <- Peers]. start_jobs([], Jobs) -> - lists:unzip3(lists:reverse(Jobs)); + lists:reverse(Jobs); start_jobs([Code | Codes], Jobs) -> try {ok, Pid} = erlperf_job:start(Code), @@ -258,10 +267,18 @@ start_jobs([Code | Codes], Jobs) -> start_jobs(Codes, [{Pid, Sample, MonRef} | Jobs]) catch Class:Reason:Stack -> %% stop jobs that were started - [(catch gen:stop(Pid)) || {Pid, _, _} <- Jobs], + stop_jobs(Jobs), erlang:raise(Class, Reason, Stack) end. +stop_jobs(Jobs) -> + %% do not use gen:stop/1,2 or sys:terminate/2,3 here, as they spawn process running + %% with normal priority, and they don't get scheduled fast enough when there is severe + %% lock contention + WaitFor = [begin erlperf_job:request_stop(Pid), {Pid, Mon} end || {Pid, _, Mon} <- Jobs, is_process_alive(Pid)], + %% now wait for all monitors to fire + [receive {'DOWN', Mon, process, Pid, _R} -> ok end || {Pid, Mon} <- WaitFor]. + -define(DEFAULT_SAMPLE_DURATION, 1000). %% low-overhead benchmark @@ -291,19 +308,34 @@ benchmark(Jobs, Options, ConOpts, Handles) -> %% * (optional) for the last 'samples' cycles coefficient of variation did not exceed 'cv' perform_benchmark(Jobs, Handles, Options) -> Interval = maps:get(sample_duration, Options, ?DEFAULT_SAMPLE_DURATION), - % sleep for "warmup * sample_duration" - timer:sleep(Interval * maps:get(warmup, Options, 0)), - % do at least 'samples' cycles + % warmup: intended to figure out sleep method (whether to apply busy_wait immediately) + NowTime = os:system_time(millisecond), + SleepMethod = warmup(maps:get(warmup, Options, 0), NowTime, NowTime + Interval, Interval, sleep), + % find all options - or take their defaults, TODO: maybe do that at a higher level? + JobMap = maps:from_list([{J, []} || J <- Jobs]), + CV = maps:get(cv, Options, undefined), + SampleCount = maps:get(samples, Options, 3), + Report = maps:get(report, Options, false), + % remember initial counters in Before + StartedAt = os:system_time(millisecond), Before = [[erlperf_job:sample(Handle)] || Handle <- Handles], - JobMap = maps:from_list([{J, []} || J <- Jobs]), %% - Samples = measure_impl(JobMap, Before, Handles, Interval, - maps:get(samples, Options, 3), maps:get(cv, Options, undefined)), - report_benchmark(Samples, maps:get(report, Options, false)). - -measure_impl(_Jobs, Before, _Handles, _Interval, 0, undefined) -> + Samples = measure_impl(JobMap, Before, Handles, StartedAt, StartedAt + Interval, Interval, + SleepMethod, SampleCount, CV), + report_benchmark(Samples, Report). + +%% warmup procedure: figure out if sleep/4 can work without falling back to busy wait +warmup(0, _LastSampleTime, _NextSampleTime, _Interval, Method) -> + Method; +warmup(Count, LastSampleTime, NextSampleTime, Interval, Method) -> + SleepFor = NextSampleTime - LastSampleTime, + NextMethod = sleep(Method, SleepFor, NextSampleTime, #{}), + NowTime = os:system_time(millisecond), + warmup(Count - 1, NowTime, NextSampleTime + Interval, Interval, NextMethod). + +measure_impl(_Jobs, Before, _Handles, _LastSampleTime, _NextSampleTime, _Interval, _SleepMethod, 0, undefined) -> normalise(Before); -measure_impl(Jobs, Before, Handles, Interval, 0, CV) -> +measure_impl(Jobs, Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, 0, CV) -> %% Complication: some jobs may need a long time to stabilise compared to others. %% Decision: wait for all jobs to stabilise (could wait for just one?) case @@ -323,18 +355,52 @@ measure_impl(Jobs, Before, Handles, Interval, 0, CV) -> true -> % imitate queue - drop last sample, push another in the head TailLess = [lists:droplast(L) || L <- Before], - measure_impl(Jobs, TailLess, Handles, Interval, 1, CV) + measure_impl(Jobs, TailLess, Handles, LastSampleTime, NextSampleTime + Interval, + Interval, SleepMethod, 1, CV) end; -measure_impl(Jobs, Before, Handles, Interval, Count, CV) -> +%% LastSampleTime: system time of the last sample +%% NextSampleTime: system time when to take the next sample +%% Interval: to calculate the next NextSampleTime +%% Count: how many more samples to take +%% CV: coefficient of variation +measure_impl(Jobs, Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, Count, CV) -> + SleepFor = NextSampleTime - LastSampleTime, + NextSleepMethod = sleep(SleepMethod, SleepFor, NextSampleTime, Jobs), + NowTime = os:system_time(millisecond), + Counts = [erlperf_job:sample(Handle) || Handle <- Handles], + measure_impl(Jobs, merge(Counts, Before), Handles, NowTime, NextSampleTime + Interval, Interval, + NextSleepMethod, Count - 1, CV). + +%% ERTS real-time properties are easily broken by lock contention (e.g. ETS misuse) +%% When it happens, even the 'max' priority process may not run for an extended +%% period of time. + +sleep(sleep, SleepFor, _WaitUntil, Jobs) when SleepFor > 0 -> receive {'DOWN', _Ref, process, Pid, Reason} when is_map_key(Pid, Jobs) -> erlang:error({benchmark, {'EXIT', Pid, Reason}}) - after Interval-> - ok - end, - Counts = [erlperf_job:sample(Handle) || Handle <- Handles], - measure_impl(Jobs, merge(Counts, Before), Handles, Interval, Count - 1, CV). + after SleepFor -> + sleep + end; +sleep(_Mode, _SleepFor, WaitUntil, Jobs) -> + busy_wait(WaitUntil, Jobs). + +%% When sleep detects significant difference in the actual sleep time vs. expected, +%% loop is switched to the busy wait. +%% Once switched to busy wait, erlperf stays there until the end of the test. +busy_wait(WaitUntil, Jobs) -> + receive + {'DOWN', _Ref, process, Pid, Reason} when is_map_key(Pid, Jobs) -> + erlang:error({benchmark, {'EXIT', Pid, Reason}}) + after 0 -> + case os:system_time(millisecond) of + Now when Now > WaitUntil -> + busy_wait; + _ -> + busy_wait(WaitUntil, Jobs) + end + end. merge([], []) -> []; diff --git a/src/erlperf_app.erl b/src/erlperf_app.erl index 4a62d45..8872ba3 100644 --- a/src/erlperf_app.erl +++ b/src/erlperf_app.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Continuous benchmarking application behaviour. %%% @end diff --git a/src/erlperf_cli.erl b/src/erlperf_cli.erl index 04634be..bd7c2e5 100644 --- a/src/erlperf_cli.erl +++ b/src/erlperf_cli.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Command line interface adapter. %%% @end @@ -37,7 +37,7 @@ main(Args) -> RunOpts0; {ok, Str} -> [erlang:error({loop, Option}) || Option <- - [concurrency, sample_duration, samples, waarmup, cv], is_map_key(Option, RunOpts0)], + [concurrency, sample_duration, samples, warmup, cv], is_map_key(Option, RunOpts0)], RunOpts0#{loop => parse_loop(Str)} end, diff --git a/src/erlperf_cluster_monitor.erl b/src/erlperf_cluster_monitor.erl index 74dada9..cbc392a 100644 --- a/src/erlperf_cluster_monitor.erl +++ b/src/erlperf_cluster_monitor.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Logs monitoring events for the entire cluster, to file or device. %%% Requires erlperf_history service running, fails otherwise. diff --git a/src/erlperf_file_log.erl b/src/erlperf_file_log.erl index 996e229..8904917 100644 --- a/src/erlperf_file_log.erl +++ b/src/erlperf_file_log.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Writes monitoring events to I/O device. %%% @end diff --git a/src/erlperf_history.erl b/src/erlperf_history.erl index a4dd707..2996c88 100644 --- a/src/erlperf_history.erl +++ b/src/erlperf_history.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Collects, accumulates & filters cluster-wide monitoring events. %%% Essentially a simple in-memory database for quick cluster overview. diff --git a/src/erlperf_job.erl b/src/erlperf_job.erl index e38e948..28c21f2 100644 --- a/src/erlperf_job.erl +++ b/src/erlperf_job.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Job runner, taking care of init/done, workers added and removed. %%% Works just like a simple_one_for_one supervisor (children are @@ -31,12 +31,14 @@ -export([ start/1, start_link/1, + request_stop/1, concurrency/1, set_concurrency/2, measure/2, sample/1, handle/1, - source/1 + source/1, + set_priority/2 ]). %% gen_server callbacks @@ -89,6 +91,13 @@ start(#{runner := _MustHave} = Code) -> start_link(#{runner := _MustHave} = Code) -> gen_server:start_link(?MODULE, generate(Code), []). +%% @doc +%% Requests this job to stop. Caller should monitor the job process +%% to find our when the job has actually stopped. +-spec request_stop(server_ref()) -> ok. +request_stop(JobId) -> + gen_server:cast(JobId, stop). + %% @doc -spec concurrency(server_ref()) -> Concurrency :: non_neg_integer(). concurrency(JobId) -> @@ -96,10 +105,11 @@ concurrency(JobId) -> %% @doc %% Change concurrency setting for this job. -%% Does not reset counting. +%% Does not reset counting. May never return if init_runner +%% does not return. -spec set_concurrency(server_ref(), non_neg_integer()) -> ok. set_concurrency(JobId, Concurrency) -> - gen_server:call(JobId, {set_concurrency, Concurrency}). + gen_server:call(JobId, {set_concurrency, Concurrency}, infinity). %% @doc %% Executes the runner SampleCount times, returns time in microseconds it @@ -129,6 +139,16 @@ sample({Module, Arity}) -> source(JobId) -> gen_server:call(JobId, source). +%% @doc +%% Sets job process priority when there are workers running. +%% Worker processes may utilise all schedulers, making job +%% process to lose control over starting and stopping workers. +%% By default, job process sets 'high' priority when there are +%% any workers running. +%% Returns the previous setting. +-spec set_priority(server_ref(), erlang:priority_level()) -> erlang:priority_level(). +set_priority(JobId, Priority) -> + gen_server:call(JobId, {priority, Priority}). %%-------------------------------------------------------------------- %% Internal definitions @@ -156,7 +176,11 @@ source(JobId) -> %% continuous workers workers = [] :: [pid()], %% temporary workers (for sample_count call) - sample_workers = #{} :: #{pid() => {pid(), reference()}} + sample_workers = #{} :: #{pid() => {pid(), reference()}}, + %% priority to return to when no workers left + initial_priority :: erlang:priority_level(), + %% priority to set when workers are running + priority = high :: erlang:priority_level() }). -type state() :: #erlperf_job_state{}. @@ -182,7 +206,8 @@ init(#exec{name = Mod, binary = Bin, init = Init, runner = {_Fun, Arity}} = Exec ok = erlperf_monitor:register(self(), {Mod, Arity}, 0), %% start tracing this module runner function 1 = erlang:trace_pattern({Mod, Mod, Arity}, true, [local, call_count]), - {ok, #erlperf_job_state{exec = Exec, init_result = InitRet}}. + {priority, Prio} = erlang:process_info(self(), priority), + {ok, #erlperf_job_state{exec = Exec, init_result = InitRet, initial_priority = Prio}}. -spec handle_call(term(), {pid(), reference()}, state()) -> {reply, term(), state()}. handle_call(handle, _From, #erlperf_job_state{exec = #exec{name = Name, runner = {_Fun, Arity}}} = State) -> @@ -203,11 +228,14 @@ handle_call({measure, _SampleCount}, _From, #erlperf_job_state{} = State) -> handle_call(source, _From, #erlperf_job_state{exec = #exec{source = Source}} = State) -> {reply, Source, State}; +handle_call({priority, Prio}, _From, #erlperf_job_state{priority = Old} = State) -> + {reply, Old, State#erlperf_job_state{priority = Prio}}; + handle_call({set_concurrency, Concurrency}, _From, #erlperf_job_state{workers = Workers} = State) -> {reply, ok, State#erlperf_job_state{workers = set_concurrency_impl(length(Workers), Concurrency, State)}}. -handle_cast(_Request, _State) -> - error(notsup). +handle_cast(stop, State) -> + {stop, normal, State}. -spec handle_info(term(), state()) -> {noreply, state()}. handle_info({'EXIT', SampleWorker, Reason}, @@ -254,19 +282,26 @@ start_sample_count(SampleCount, ReplyTo, InitRunner, InitRet, {SampleRunner, _}) ), #{Child => ReplyTo}. -set_concurrency_impl(OldConcurrency, Concurrency, #erlperf_job_state{workers = Workers, init_result = IR, exec = Exec}) -> +set_concurrency_impl(OldConcurrency, Concurrency, #erlperf_job_state{workers = Workers, init_result = IR, exec = Exec, + priority = Prio, initial_priority = InitialPrio}) -> case Concurrency - OldConcurrency of 0 -> Workers; NeedMore when NeedMore > 0 -> + %% this process must run with higher priority to avoid being de-scheduled by runners + OldConcurrency =:= 0 andalso erlang:process_flag(priority, Prio), Workers ++ add_workers(NeedMore, Exec, IR, []); NeedLess -> - remove_workers(NeedLess, Workers) + {Fire, Keep} = lists:split(-NeedLess, Workers), + stop_workers(Fire), + Keep =:= [] andalso erlang:process_flag(priority, InitialPrio), + Keep end. add_workers(0, _ExecMap, _InitRet, NewWorkers) -> %% ensure all new workers completed their InitRunner routine [receive {Worker, init_runner} -> ok end || Worker <- NewWorkers], + [Worker ! go || Worker <- NewWorkers], NewWorkers; add_workers(More, #exec{init_runner = InitRunner, runner = {Runner, _RunnerArity}} = Exec, InitRet, NewWorkers) -> Control = self(), @@ -275,18 +310,15 @@ add_workers(More, #exec{init_runner = InitRunner, runner = {Runner, _RunnerArity fun () -> %% need to send a message even if init_runner fails, hence 'after' IRR = try InitRunner(InitRet) after Control ! {self(), init_runner} end, + receive go -> ok end, Runner(IRR) end), add_workers(More - 1, Exec, InitRet, [Worker | NewWorkers]). -remove_workers(0, Workers) -> - Workers; -remove_workers(Less, [Worker | Workers]) -> - exit(Worker, kill), - receive - {'EXIT', Worker, _Reason} -> - remove_workers(Less + 1, Workers) - end. +stop_workers(Workers) -> + %% try to stop concurrently + [exit(Worker, kill) || Worker <- Workers], + [receive {'EXIT', Worker, _Reason} -> ok end || Worker <- Workers]. %%%=================================================================== %%% Internal: code generation diff --git a/src/erlperf_job_sup.erl b/src/erlperf_job_sup.erl index 2ff9ff1..87644db 100644 --- a/src/erlperf_job_sup.erl +++ b/src/erlperf_job_sup.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% Supervises statically started jobs. %%% @end -module(erlperf_job_sup). diff --git a/src/erlperf_monitor.erl b/src/erlperf_monitor.erl index 2c5516d..b9d4dc2 100644 --- a/src/erlperf_monitor.erl +++ b/src/erlperf_monitor.erl @@ -1,5 +1,5 @@ %%%------------------------------------------------------------------- -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% System monitor: scheduler, RAM, and benchmarks throughput %%% samples. diff --git a/src/erlperf_sup.erl b/src/erlperf_sup.erl index 64be90f..246ae75 100644 --- a/src/erlperf_sup.erl +++ b/src/erlperf_sup.erl @@ -1,4 +1,4 @@ -%%% @copyright (C) 2019-2022, Maxim Fedorov +%%% @copyright (C) 2019-2023, Maxim Fedorov %%% @doc %%% Top-level supervisor. Always starts process group scope %%% for `erlperf'. Depending on the configuration starts diff --git a/test/erlperf_SUITE.erl b/test/erlperf_SUITE.erl index f79ac76..4c3d2d9 100644 --- a/test/erlperf_SUITE.erl +++ b/test/erlperf_SUITE.erl @@ -1,6 +1,6 @@ %%%------------------------------------------------------------------- %%% @author Maxim Fedorov -%%% @copyright (c) 2019-2022 Maxim Fedorov +%%% @copyright (c) 2019-2023 Maxim Fedorov %%% @doc %%% Tests benchmark module, machine-readable output for benchmarks. %%% @end @@ -28,6 +28,8 @@ crasher/0, crasher/1, undefer/0, undefer/1, compare/1, errors/0, errors/1]). +-export([overhead_benchmark/0, overhead_benchmark/1]). + -export([ cmd_line_simple/1, cmd_line_verbose/1, cmd_line_compare/1, cmd_line_usage/1, cmd_line_init/1, @@ -70,7 +72,10 @@ groups() -> compare, errors ]}, - {cmdline, [sequential], [ + {overhead, [], [ + overhead_benchmark + ]}, + {cmdline, [], [ cmd_line_simple, cmd_line_verbose, cmd_line_compare, @@ -87,13 +92,13 @@ groups() -> {squeeze, [], [ mfa_squeeze ]}, - {replay, [sequential], [ + {replay, [], [ replay ]} ]. all() -> - [{group, benchmark}, {group, cmdline}, {group, squeeze}, {group, replay}]. + [{group, benchmark}, {group, overhead}, {group, cmdline}, {group, squeeze}, {group, replay}]. %%-------------------------------------------------------------------- %% Helpers: gen_server implementation @@ -284,6 +289,25 @@ mfa_squeeze(Config) when is_list(Config) -> ?assert(CPU > 1) end. +overhead_benchmark() -> + [{doc, "Ensures that benchmarking overhead when running multiple concurrent processes is not too high"}, + {timetrap, {seconds, 20}}]. + +overhead_benchmark(Config) when is_list(Config) -> + Init = fun() -> ets:new(tab, [public, named_table]) end, + Done = fun(Tab) -> ets:delete(Tab) end, + Runner = fun() -> true = ets:insert(tab, {key, value}) end, %% this inevitably causes lock contention + %% take 10 samples of 100 ms, which should complete in about a second, or at least less than two + %% extra 3 samples of 100 ms are taken for 'warmup' cycles, to calibrate the sleep/4 function, that + %% is expected to switch into busy_wait mode. + Before = os:system_time(millisecond), + QPS = erlperf:run(#{runner => Runner, init => Init, done => Done}, + #{concurrency => 50, samples => 10, sample_duration => 100, warmup => 3}), + TimeSpent = os:system_time(millisecond) - Before, + ?assert(QPS > 0, {qps, QPS}), + ?assert(TimeSpent > 500, {too_quick, TimeSpent, expected, 1000}), + ?assert(TimeSpent < 2000, {too_slow, TimeSpent, expected, 1000}). + %%-------------------------------------------------------------------- %% command-line testing diff --git a/test/erlperf_file_log_SUITE.erl b/test/erlperf_file_log_SUITE.erl index 4e85132..2730898 100644 --- a/test/erlperf_file_log_SUITE.erl +++ b/test/erlperf_file_log_SUITE.erl @@ -1,4 +1,4 @@ -%%% @copyright (c) 2019-2022 Maxim Fedorov +%%% @copyright (c) 2019-2023 Maxim Fedorov %%% @doc %%% Tests erlperf_file_log %%% @end diff --git a/test/erlperf_history_SUITE.erl b/test/erlperf_history_SUITE.erl index 0c1ad28..f339785 100644 --- a/test/erlperf_history_SUITE.erl +++ b/test/erlperf_history_SUITE.erl @@ -1,4 +1,4 @@ -%%% @copyright (c) 2019-2022 Maxim Fedorov +%%% @copyright (c) 2019-2023 Maxim Fedorov %%% @doc %%% Tests combination of erlperf_monitor, erlperf_cluster_monitor, %%% erlperf_history and erlperf_job. This is an integration test diff --git a/test/erlperf_job_SUITE.erl b/test/erlperf_job_SUITE.erl index e708b50..7170e65 100644 --- a/test/erlperf_job_SUITE.erl +++ b/test/erlperf_job_SUITE.erl @@ -1,4 +1,4 @@ -%%% @copyright (c) 2019-2022 Maxim Fedorov +%%% @copyright (c) 2019-2023 Maxim Fedorov %%% @doc %%% Tests all combinations of code maps accepted by erlperf_job %%% @end @@ -47,10 +47,12 @@ overhead(Config) when is_list(Config) -> %% special case: code with no init/state/MFA/fun/... can be optimised %% to a simple "for-loop" and get executed with the lease overhead {ok, Job} = erlperf_job:start_link(#{runner => "rand:uniform(1000)."}), + high = erlperf_job:set_priority(Job, max), Sampler = erlperf_job:handle(Job), TimeUs = erlperf_job:measure(Job, SampleCount), %% measure the same thing now with continuous benchmark ok = erlperf_job:set_concurrency(Job, 1), + {priority, max} = erlang:process_info(Job, priority), %% fetch a sample Start = erlperf_job:sample(Sampler), timer:sleep(1000), diff --git a/test/erlperf_monitor_SUITE.erl b/test/erlperf_monitor_SUITE.erl index 029bed2..11e2edf 100644 --- a/test/erlperf_monitor_SUITE.erl +++ b/test/erlperf_monitor_SUITE.erl @@ -1,6 +1,6 @@ %%%------------------------------------------------------------------- %%% @author Maxim Fedorov -%%% @copyright (c) 2019-2022 Maxim Fedorov +%%% @copyright (c) 2019-2023 Maxim Fedorov %%% @doc %%% Tests monitor %%% @end