Skip to content

Commit

Permalink
Merge pull request #23 from max-au/max-au/busy-wait-loop
Browse files Browse the repository at this point in the history
erlperf: fixes and updates to 2.1.0
  • Loading branch information
max-au authored Jan 18, 2023
2 parents 3758c6b + fda05d6 commit fd95952
Show file tree
Hide file tree
Showing 20 changed files with 213 additions and 69 deletions.
13 changes: 7 additions & 6 deletions .github/workflows/erlang.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@ jobs:
linux:
name: Test on OTP ${{ matrix.otp_version }} and ${{ matrix.os }}
runs-on: ${{ matrix.os }}

strategy:
matrix:
otp_version: ['23', '24', '25']
otp_version: [23, 24, 25]
os: [ubuntu-latest]

container:
image: erlang:${{ matrix.otp_version }}

steps:
- uses: actions/checkout@v2
- uses: erlef/setup-beam@v1
with:
otp-version: ${{matrix.otp_version}}
rebar3-version: '3.17.0'
- name: Run tests
run: rebar3 ct
- name: Generate documentation
Expand All @@ -33,4 +34,4 @@ jobs:
run: rebar3 as prod escriptize
- shell: bash
name: Smoke test
run: ./erlperf 'timer:sleep(1).'
run: ./erlperf 'timer:sleep(1).'
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Changelog

## 2.1.0
- fixed -w (--warmup) argument missing from command line
- synchronised worker startup when adding concurrency
- concurrent worker shutdown when reducing concurrency
- elevated job & benchmark process priority to avoid result skew
- implemented scheduling problem detection (e.g. lock contention),
added a busy loop method workaround

## 2.0.2
- added convenience command line options: init_all, done_all, init_runner_all

Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,17 @@ halved:
rand:uniform(). 1 2771 Ki 72 ns
```
## Benchmarking under lock contention
ERTS cannot guarantee precise timing when there is severe lock contention happening,
and scheduler utilisation is 100%. This often happens with ETS:
```bash
$ ./erlperf -c 50 'ets:insert(ac_tab, {1, 2}).'
```
Running 50 concurrent processes trying to overwrite the very same key of an ETS
table leads to lock contention on a shared resource (ETS table/bucket lock).
## Concurrency test (squeeze)
Sometimes it's necessary to measure code running multiple concurrent
processes, and find out when it saturates the node. It can be used to
Expand Down
6 changes: 3 additions & 3 deletions rebar.lock
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{"1.2.0",
[{<<"argparse">>,{pkg,<<"argparse">>,<<"1.2.3">>},0}]}.
[{<<"argparse">>,{pkg,<<"argparse">>,<<"1.2.4">>},0}]}.
[
{pkg_hash,[
{<<"argparse">>, <<"20578A5D86250D7FB1BF6D116054BF707C247395E740BC8FC1384CC49DFB06F7">>}]},
{<<"argparse">>, <<"A31E7C5D9F8814AFCD9B42C1B98C21DA6F851F93F2C8C00C107F6201668A0A7D">>}]},
{pkg_hash_ext,[
{<<"argparse">>, <<"BFE94654811112FB86880C47B3600313D8D2281A1CCE4DADA2B09E5C3C9988D3">>}]}
{<<"argparse">>, <<"AC6FDB7183EA20ADEB7EB66E34B21F2E8C4C6925913EE0C0765D339D97009FFE">>}]}
].
2 changes: 1 addition & 1 deletion src/erlperf.app.src
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{application, erlperf,
[{description, "Erlang Performance & Benchmarking Suite"},
{vsn, "2.0.2"},
{vsn, "2.1.0"},
{registered, [
erlperf_sup, erlperf_job_sup, erlperf_monitor,
erlperf_history, erlperf_file_log, erlperf_cluster_monitor
Expand Down
114 changes: 90 additions & 24 deletions src/erlperf.erl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%%% @copyright (C) 2019-2022, Maxim Fedorov
%%% @copyright (C) 2019-2023, Maxim Fedorov
%%% @doc
%%% Application API. Benchmark/squeeze implementation.
%%% @end
Expand Down Expand Up @@ -43,6 +43,11 @@
%% experimental feature allowing to benchmark processes with
%% wildly jumping throughput
cv => float(),
%% sets the supervising process priority. Default is 'high',
%% allowing test runner to be scheduled even under high scheduler
%% utilisation (which is expected for a benchmark running many
%% concurrent processes)
priority => erlang:priority_level(),
%% report form for single benchmark: when set to 'extended',
%% all non-warmup samples are returned as a list.
%% When missing, only the average QPS is returned.
Expand Down Expand Up @@ -113,12 +118,16 @@ benchmark(Codes, #{isolation := _Isolation} = Options, ConOpts) ->

%% no isolation requested, do normal in-BEAM test
benchmark(Codes, Options, ConOpts) ->
{Jobs, Samples, Monitors} = start_jobs(Codes, []),
%% elevate priority to reduce timer skew
SetPrio = maps:get(priority, Options, high),
PrevPriority = process_flag(priority, SetPrio),
Jobs = start_jobs(Codes, []),
{JobPids, Samples, _} = lists:unzip3(Jobs),
try
benchmark(Jobs, Options, ConOpts, Samples)
benchmark(JobPids, Options, ConOpts, Samples)
after
[erlang:demonitor(Mon, [flush]) || Mon <- Monitors],
[(catch gen:stop(Pid)) || Pid <- Jobs]
stop_jobs(Jobs),
process_flag(priority, PrevPriority)
end.

%% @doc
Expand Down Expand Up @@ -249,7 +258,7 @@ stop_nodes(Peers, _Nodes) ->
[peer:stop(Peer) || Peer <- Peers].

start_jobs([], Jobs) ->
lists:unzip3(lists:reverse(Jobs));
lists:reverse(Jobs);
start_jobs([Code | Codes], Jobs) ->
try
{ok, Pid} = erlperf_job:start(Code),
Expand All @@ -258,10 +267,18 @@ start_jobs([Code | Codes], Jobs) ->
start_jobs(Codes, [{Pid, Sample, MonRef} | Jobs])
catch Class:Reason:Stack ->
%% stop jobs that were started
[(catch gen:stop(Pid)) || {Pid, _, _} <- Jobs],
stop_jobs(Jobs),
erlang:raise(Class, Reason, Stack)
end.

stop_jobs(Jobs) ->
%% do not use gen:stop/1,2 or sys:terminate/2,3 here, as they spawn process running
%% with normal priority, and they don't get scheduled fast enough when there is severe
%% lock contention
WaitFor = [begin erlperf_job:request_stop(Pid), {Pid, Mon} end || {Pid, _, Mon} <- Jobs, is_process_alive(Pid)],
%% now wait for all monitors to fire
[receive {'DOWN', Mon, process, Pid, _R} -> ok end || {Pid, Mon} <- WaitFor].

-define(DEFAULT_SAMPLE_DURATION, 1000).

%% low-overhead benchmark
Expand Down Expand Up @@ -291,19 +308,34 @@ benchmark(Jobs, Options, ConOpts, Handles) ->
%% * (optional) for the last 'samples' cycles coefficient of variation did not exceed 'cv'
perform_benchmark(Jobs, Handles, Options) ->
Interval = maps:get(sample_duration, Options, ?DEFAULT_SAMPLE_DURATION),
% sleep for "warmup * sample_duration"
timer:sleep(Interval * maps:get(warmup, Options, 0)),
% do at least 'samples' cycles
% warmup: intended to figure out sleep method (whether to apply busy_wait immediately)
NowTime = os:system_time(millisecond),
SleepMethod = warmup(maps:get(warmup, Options, 0), NowTime, NowTime + Interval, Interval, sleep),
% find all options - or take their defaults, TODO: maybe do that at a higher level?
JobMap = maps:from_list([{J, []} || J <- Jobs]),
CV = maps:get(cv, Options, undefined),
SampleCount = maps:get(samples, Options, 3),
Report = maps:get(report, Options, false),
% remember initial counters in Before
StartedAt = os:system_time(millisecond),
Before = [[erlperf_job:sample(Handle)] || Handle <- Handles],
JobMap = maps:from_list([{J, []} || J <- Jobs]), %%
Samples = measure_impl(JobMap, Before, Handles, Interval,
maps:get(samples, Options, 3), maps:get(cv, Options, undefined)),
report_benchmark(Samples, maps:get(report, Options, false)).

measure_impl(_Jobs, Before, _Handles, _Interval, 0, undefined) ->
Samples = measure_impl(JobMap, Before, Handles, StartedAt, StartedAt + Interval, Interval,
SleepMethod, SampleCount, CV),
report_benchmark(Samples, Report).

%% warmup procedure: figure out if sleep/4 can work without falling back to busy wait
warmup(0, _LastSampleTime, _NextSampleTime, _Interval, Method) ->
Method;
warmup(Count, LastSampleTime, NextSampleTime, Interval, Method) ->
SleepFor = NextSampleTime - LastSampleTime,
NextMethod = sleep(Method, SleepFor, NextSampleTime, #{}),
NowTime = os:system_time(millisecond),
warmup(Count - 1, NowTime, NextSampleTime + Interval, Interval, NextMethod).

measure_impl(_Jobs, Before, _Handles, _LastSampleTime, _NextSampleTime, _Interval, _SleepMethod, 0, undefined) ->
normalise(Before);

measure_impl(Jobs, Before, Handles, Interval, 0, CV) ->
measure_impl(Jobs, Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, 0, CV) ->
%% Complication: some jobs may need a long time to stabilise compared to others.
%% Decision: wait for all jobs to stabilise (could wait for just one?)
case
Expand All @@ -323,18 +355,52 @@ measure_impl(Jobs, Before, Handles, Interval, 0, CV) ->
true ->
% imitate queue - drop last sample, push another in the head
TailLess = [lists:droplast(L) || L <- Before],
measure_impl(Jobs, TailLess, Handles, Interval, 1, CV)
measure_impl(Jobs, TailLess, Handles, LastSampleTime, NextSampleTime + Interval,
Interval, SleepMethod, 1, CV)
end;

measure_impl(Jobs, Before, Handles, Interval, Count, CV) ->
%% LastSampleTime: system time of the last sample
%% NextSampleTime: system time when to take the next sample
%% Interval: to calculate the next NextSampleTime
%% Count: how many more samples to take
%% CV: coefficient of variation
measure_impl(Jobs, Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, Count, CV) ->
SleepFor = NextSampleTime - LastSampleTime,
NextSleepMethod = sleep(SleepMethod, SleepFor, NextSampleTime, Jobs),
NowTime = os:system_time(millisecond),
Counts = [erlperf_job:sample(Handle) || Handle <- Handles],
measure_impl(Jobs, merge(Counts, Before), Handles, NowTime, NextSampleTime + Interval, Interval,
NextSleepMethod, Count - 1, CV).

%% ERTS real-time properties are easily broken by lock contention (e.g. ETS misuse)
%% When it happens, even the 'max' priority process may not run for an extended
%% period of time.

sleep(sleep, SleepFor, _WaitUntil, Jobs) when SleepFor > 0 ->
receive
{'DOWN', _Ref, process, Pid, Reason} when is_map_key(Pid, Jobs) ->
erlang:error({benchmark, {'EXIT', Pid, Reason}})
after Interval->
ok
end,
Counts = [erlperf_job:sample(Handle) || Handle <- Handles],
measure_impl(Jobs, merge(Counts, Before), Handles, Interval, Count - 1, CV).
after SleepFor ->
sleep
end;
sleep(_Mode, _SleepFor, WaitUntil, Jobs) ->
busy_wait(WaitUntil, Jobs).

%% When sleep detects significant difference in the actual sleep time vs. expected,
%% loop is switched to the busy wait.
%% Once switched to busy wait, erlperf stays there until the end of the test.
busy_wait(WaitUntil, Jobs) ->
receive
{'DOWN', _Ref, process, Pid, Reason} when is_map_key(Pid, Jobs) ->
erlang:error({benchmark, {'EXIT', Pid, Reason}})
after 0 ->
case os:system_time(millisecond) of
Now when Now > WaitUntil ->
busy_wait;
_ ->
busy_wait(WaitUntil, Jobs)
end
end.

merge([], []) ->
[];
Expand Down
2 changes: 1 addition & 1 deletion src/erlperf_app.erl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%%% @copyright (C) 2019-2022, Maxim Fedorov
%%% @copyright (C) 2019-2023, Maxim Fedorov
%%% @doc
%%% Continuous benchmarking application behaviour.
%%% @end
Expand Down
4 changes: 2 additions & 2 deletions src/erlperf_cli.erl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%%% @copyright (C) 2019-2022, Maxim Fedorov
%%% @copyright (C) 2019-2023, Maxim Fedorov
%%% @doc
%%% Command line interface adapter.
%%% @end
Expand Down Expand Up @@ -37,7 +37,7 @@ main(Args) ->
RunOpts0;
{ok, Str} ->
[erlang:error({loop, Option}) || Option <-
[concurrency, sample_duration, samples, waarmup, cv], is_map_key(Option, RunOpts0)],
[concurrency, sample_duration, samples, warmup, cv], is_map_key(Option, RunOpts0)],
RunOpts0#{loop => parse_loop(Str)}
end,

Expand Down
2 changes: 1 addition & 1 deletion src/erlperf_cluster_monitor.erl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%%% @copyright (C) 2019-2022, Maxim Fedorov
%%% @copyright (C) 2019-2023, Maxim Fedorov
%%% @doc
%%% Logs monitoring events for the entire cluster, to file or device.
%%% Requires erlperf_history service running, fails otherwise.
Expand Down
2 changes: 1 addition & 1 deletion src/erlperf_file_log.erl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%%% @copyright (C) 2019-2022, Maxim Fedorov
%%% @copyright (C) 2019-2023, Maxim Fedorov
%%% @doc
%%% Writes monitoring events to I/O device.
%%% @end
Expand Down
2 changes: 1 addition & 1 deletion src/erlperf_history.erl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
%%% @copyright (C) 2019-2022, Maxim Fedorov
%%% @copyright (C) 2019-2023, Maxim Fedorov
%%% @doc
%%% Collects, accumulates &amp; filters cluster-wide monitoring events.
%%% Essentially a simple in-memory database for quick cluster overview.
Expand Down
Loading

0 comments on commit fd95952

Please sign in to comment.