erlperf: fixes and updates to 2.1.0

Major change is to detect lock contention and apply busy wait in the primary process. It does not reduce total throughput too much, because CPU is not actually busy, so spinning on it does not introduce too much of a problem. Changes: * bumped argparse to 1.2.4 * fixed -w (--warmup) argument missing from command line * synchronised worker startup when adding concurrency * concurrent worker shutdown when reducing concurrency * elevated job & benchmark process priority to avoid result skew * implemented scheduling problem detection (e.g. lock contention), added a busy loop method workaround * backout from erlef GitHub actions CI to enable OTP 23 support
max-au · Jan 18, 2023 · fda05d6 · fda05d6
1 parent 3758c6b
commit fda05d6
Show file tree

Hide file tree

Showing 20 changed files with 213 additions and 69 deletions.
diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml
@@ -11,16 +11,17 @@ jobs:
   linux:
     name: Test on OTP ${{ matrix.otp_version }} and ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
+
     strategy:
       matrix:
-        otp_version: ['23', '24', '25']
+        otp_version: [23, 24, 25]
         os: [ubuntu-latest]
+
+    container:
+      image: erlang:${{ matrix.otp_version }}
+
     steps:
       - uses: actions/checkout@v2
-      - uses: erlef/setup-beam@v1
-        with:
-          otp-version: ${{matrix.otp_version}}
-          rebar3-version: '3.17.0'
       - name: Run tests
         run: rebar3 ct
       - name: Generate documentation
@@ -33,4 +34,4 @@ jobs:
         run: rebar3 as prod escriptize
       - shell: bash
         name: Smoke test
-        run: ./erlperf 'timer:sleep(1).'
+        run: ./erlperf 'timer:sleep(1).'
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 2.1.0
+- fixed -w (--warmup) argument missing from command line
+- synchronised worker startup when adding concurrency
+- concurrent worker shutdown when reducing concurrency
+- elevated job & benchmark process priority to avoid result skew
+- implemented scheduling problem detection (e.g. lock contention),
+  added a busy loop method workaround
+
 ## 2.0.2
 - added convenience command line options: init_all, done_all, init_runner_all
 

diff --git a/README.md b/README.md
@@ -289,6 +289,17 @@ halved:
     rand:uniform().          1    2771 Ki      72 ns
 ```
 
+## Benchmarking under lock contention
+ERTS cannot guarantee precise timing when there is severe lock contention happening,
+and scheduler utilisation is 100%. This often happens with ETS:
+```bash
+    $ ./erlperf -c 50 'ets:insert(ac_tab, {1, 2}).'
+```
+Running 50 concurrent processes trying to overwrite the very same key of an ETS
+table leads to lock contention on a shared resource (ETS table/bucket lock).
+
+
+
 ## Concurrency test (squeeze)
 Sometimes it's necessary to measure code running multiple concurrent
 processes, and find out when it saturates the node. It can be used to

diff --git a/rebar.lock b/rebar.lock
@@ -1,8 +1,8 @@
 {"1.2.0",
-[{<<"argparse">>,{pkg,<<"argparse">>,<<"1.2.3">>},0}]}.
+[{<<"argparse">>,{pkg,<<"argparse">>,<<"1.2.4">>},0}]}.
 [
 {pkg_hash,[
- {<<"argparse">>, <<"20578A5D86250D7FB1BF6D116054BF707C247395E740BC8FC1384CC49DFB06F7">>}]},
+ {<<"argparse">>, <<"A31E7C5D9F8814AFCD9B42C1B98C21DA6F851F93F2C8C00C107F6201668A0A7D">>}]},
 {pkg_hash_ext,[
- {<<"argparse">>, <<"BFE94654811112FB86880C47B3600313D8D2281A1CCE4DADA2B09E5C3C9988D3">>}]}
+ {<<"argparse">>, <<"AC6FDB7183EA20ADEB7EB66E34B21F2E8C4C6925913EE0C0765D339D97009FFE">>}]}
 ].
diff --git a/src/erlperf.app.src b/src/erlperf.app.src
@@ -1,6 +1,6 @@
 {application, erlperf,
  [{description, "Erlang Performance & Benchmarking Suite"},
-  {vsn, "2.0.2"},
+  {vsn, "2.1.0"},
   {registered, [
     erlperf_sup, erlperf_job_sup, erlperf_monitor,
    erlperf_history, erlperf_file_log, erlperf_cluster_monitor

diff --git a/src/erlperf.erl b/src/erlperf.erl
@@ -1,4 +1,4 @@
-%%% @copyright (C) 2019-2022, Maxim Fedorov
+%%% @copyright (C) 2019-2023, Maxim Fedorov
 %%% @doc
 %%%   Application API. Benchmark/squeeze implementation.
 %%% @end
@@ -43,6 +43,11 @@
     %% experimental feature allowing to benchmark processes with
     %%  wildly jumping throughput
     cv => float(),
+    %% sets the supervising process priority. Default is 'high',
+    %% allowing test runner to be scheduled even under high scheduler
+    %% utilisation (which is expected for a benchmark running many
+    %% concurrent processes)
+    priority => erlang:priority_level(),
     %% report form for single benchmark: when set to 'extended',
     %%  all non-warmup samples are returned as a list.
     %% When missing, only the average QPS is returned.
@@ -113,12 +118,16 @@ benchmark(Codes, #{isolation := _Isolation} = Options, ConOpts) ->
 
 %% no isolation requested, do normal in-BEAM test
 benchmark(Codes, Options, ConOpts) ->
-    {Jobs, Samples, Monitors} = start_jobs(Codes, []),
+    %% elevate priority to reduce timer skew
+    SetPrio = maps:get(priority, Options, high),
+    PrevPriority = process_flag(priority, SetPrio),
+    Jobs = start_jobs(Codes, []),
+    {JobPids, Samples, _} = lists:unzip3(Jobs),
     try
-        benchmark(Jobs, Options, ConOpts, Samples)
+        benchmark(JobPids, Options, ConOpts, Samples)
     after
-        [erlang:demonitor(Mon, [flush]) || Mon <- Monitors],
-        [(catch gen:stop(Pid)) || Pid <- Jobs]
+        stop_jobs(Jobs),
+        process_flag(priority, PrevPriority)
     end.
 
 %% @doc
@@ -249,7 +258,7 @@ stop_nodes(Peers, _Nodes) ->
     [peer:stop(Peer) || Peer <- Peers].
 
 start_jobs([], Jobs) ->
-    lists:unzip3(lists:reverse(Jobs));
+    lists:reverse(Jobs);
 start_jobs([Code | Codes], Jobs) ->
     try
         {ok, Pid} = erlperf_job:start(Code),
@@ -258,10 +267,18 @@ start_jobs([Code | Codes], Jobs) ->
         start_jobs(Codes, [{Pid, Sample, MonRef} | Jobs])
     catch Class:Reason:Stack ->
         %% stop jobs that were started
-        [(catch gen:stop(Pid)) || {Pid, _, _} <- Jobs],
+        stop_jobs(Jobs),
         erlang:raise(Class, Reason, Stack)
     end.
 
+stop_jobs(Jobs) ->
+    %% do not use gen:stop/1,2 or sys:terminate/2,3 here, as they spawn process running
+    %%  with normal priority, and they don't get scheduled fast enough when there is severe
+    %%  lock contention
+    WaitFor = [begin erlperf_job:request_stop(Pid), {Pid, Mon} end || {Pid, _, Mon} <- Jobs, is_process_alive(Pid)],
+    %% now wait for all monitors to fire
+    [receive {'DOWN', Mon, process, Pid, _R} -> ok end || {Pid, Mon} <- WaitFor].
+
 -define(DEFAULT_SAMPLE_DURATION, 1000).
 
 %% low-overhead benchmark
@@ -291,19 +308,34 @@ benchmark(Jobs, Options, ConOpts, Handles) ->
 %%  * (optional) for the last 'samples' cycles coefficient of variation did not exceed 'cv'
 perform_benchmark(Jobs, Handles, Options) ->
     Interval = maps:get(sample_duration, Options, ?DEFAULT_SAMPLE_DURATION),
-    % sleep for "warmup * sample_duration"
-    timer:sleep(Interval * maps:get(warmup, Options, 0)),
-    % do at least 'samples' cycles
+    % warmup: intended to figure out sleep method (whether to apply busy_wait immediately)
+    NowTime = os:system_time(millisecond),
+    SleepMethod = warmup(maps:get(warmup, Options, 0), NowTime, NowTime + Interval, Interval, sleep),
+    % find all options - or take their defaults, TODO: maybe do that at a higher level?
+    JobMap = maps:from_list([{J, []} || J <- Jobs]),
+    CV = maps:get(cv, Options, undefined),
+    SampleCount = maps:get(samples, Options, 3),
+    Report = maps:get(report, Options, false),
+    % remember initial counters in Before
+    StartedAt = os:system_time(millisecond),
     Before = [[erlperf_job:sample(Handle)] || Handle <- Handles],
-    JobMap = maps:from_list([{J, []} || J <- Jobs]), %%
-    Samples = measure_impl(JobMap, Before, Handles, Interval,
-        maps:get(samples, Options, 3), maps:get(cv, Options, undefined)),
-    report_benchmark(Samples, maps:get(report, Options, false)).
-
-measure_impl(_Jobs, Before, _Handles, _Interval, 0, undefined) ->
+    Samples = measure_impl(JobMap, Before, Handles, StartedAt, StartedAt + Interval, Interval,
+        SleepMethod, SampleCount, CV),
+    report_benchmark(Samples, Report).
+
+%% warmup procedure: figure out if sleep/4 can work without falling back to busy wait
+warmup(0, _LastSampleTime, _NextSampleTime, _Interval, Method) ->
+    Method;
+warmup(Count, LastSampleTime, NextSampleTime, Interval, Method) ->
+    SleepFor = NextSampleTime - LastSampleTime,
+    NextMethod = sleep(Method, SleepFor, NextSampleTime, #{}),
+    NowTime = os:system_time(millisecond),
+    warmup(Count - 1, NowTime, NextSampleTime + Interval, Interval, NextMethod).
+
+measure_impl(_Jobs, Before, _Handles, _LastSampleTime, _NextSampleTime, _Interval, _SleepMethod, 0, undefined) ->
     normalise(Before);
 
-measure_impl(Jobs, Before, Handles, Interval, 0, CV) ->
+measure_impl(Jobs, Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, 0, CV) ->
     %% Complication: some jobs may need a long time to stabilise compared to others.
     %% Decision: wait for all jobs to stabilise (could wait for just one?)
     case
@@ -323,18 +355,52 @@ measure_impl(Jobs, Before, Handles, Interval, 0, CV) ->
         true ->
             % imitate queue - drop last sample, push another in the head
             TailLess = [lists:droplast(L) || L <- Before],
-            measure_impl(Jobs, TailLess, Handles, Interval, 1, CV)
+            measure_impl(Jobs, TailLess, Handles, LastSampleTime, NextSampleTime + Interval,
+                Interval, SleepMethod, 1, CV)
     end;
 
-measure_impl(Jobs, Before, Handles, Interval, Count, CV) ->
+%% LastSampleTime: system time of the last sample
+%% NextSampleTime: system time when to take the next sample
+%% Interval: to calculate the next NextSampleTime
+%% Count: how many more samples to take
+%% CV: coefficient of variation
+measure_impl(Jobs, Before, Handles, LastSampleTime, NextSampleTime, Interval, SleepMethod, Count, CV) ->
+    SleepFor = NextSampleTime - LastSampleTime,
+    NextSleepMethod = sleep(SleepMethod, SleepFor, NextSampleTime, Jobs),
+    NowTime = os:system_time(millisecond),
+    Counts = [erlperf_job:sample(Handle) || Handle <- Handles],
+    measure_impl(Jobs, merge(Counts, Before), Handles, NowTime, NextSampleTime + Interval, Interval,
+        NextSleepMethod, Count - 1, CV).
+
+%% ERTS real-time properties are easily broken by lock contention (e.g. ETS misuse)
+%% When it happens, even the 'max' priority process may not run for an extended
+%% period of time.
+
+sleep(sleep, SleepFor, _WaitUntil, Jobs) when SleepFor > 0 ->
     receive
         {'DOWN', _Ref, process, Pid, Reason} when is_map_key(Pid, Jobs) ->
             erlang:error({benchmark, {'EXIT', Pid, Reason}})
-    after Interval->
-        ok
-    end,
-    Counts = [erlperf_job:sample(Handle) || Handle <- Handles],
-    measure_impl(Jobs, merge(Counts, Before), Handles, Interval, Count - 1, CV).
+    after SleepFor ->
+        sleep
+    end;
+sleep(_Mode, _SleepFor, WaitUntil, Jobs) ->
+    busy_wait(WaitUntil, Jobs).
+
+%% When sleep detects significant difference in the actual sleep time vs. expected,
+%% loop is switched to the busy wait.
+%% Once switched to busy wait, erlperf stays there until the end of the test.
+busy_wait(WaitUntil, Jobs) ->
+    receive
+        {'DOWN', _Ref, process, Pid, Reason} when is_map_key(Pid, Jobs) ->
+            erlang:error({benchmark, {'EXIT', Pid, Reason}})
+    after 0 ->
+        case os:system_time(millisecond) of
+            Now when Now > WaitUntil ->
+                busy_wait;
+            _ ->
+                busy_wait(WaitUntil, Jobs)
+        end
+    end.
 
 merge([], []) ->
     [];

diff --git a/src/erlperf_app.erl b/src/erlperf_app.erl
@@ -1,4 +1,4 @@
-%%% @copyright (C) 2019-2022, Maxim Fedorov
+%%% @copyright (C) 2019-2023, Maxim Fedorov
 %%% @doc
 %%% Continuous benchmarking application behaviour.
 %%% @end

diff --git a/src/erlperf_cli.erl b/src/erlperf_cli.erl
@@ -1,4 +1,4 @@
-%%% @copyright (C) 2019-2022, Maxim Fedorov
+%%% @copyright (C) 2019-2023, Maxim Fedorov
 %%% @doc
 %%% Command line interface adapter.
 %%% @end
@@ -37,7 +37,7 @@ main(Args) ->
                     RunOpts0;
                 {ok, Str} ->
                     [erlang:error({loop, Option}) || Option <-
-                        [concurrency, sample_duration, samples, waarmup, cv], is_map_key(Option, RunOpts0)],
+                        [concurrency, sample_duration, samples, warmup, cv], is_map_key(Option, RunOpts0)],
                     RunOpts0#{loop => parse_loop(Str)}
                 end,
 

diff --git a/src/erlperf_cluster_monitor.erl b/src/erlperf_cluster_monitor.erl
@@ -1,4 +1,4 @@
-%%% @copyright (C) 2019-2022, Maxim Fedorov
+%%% @copyright (C) 2019-2023, Maxim Fedorov
 %%% @doc
 %%% Logs monitoring events for the entire cluster, to file or device.
 %%%  Requires erlperf_history service running, fails otherwise.

diff --git a/src/erlperf_file_log.erl b/src/erlperf_file_log.erl
@@ -1,4 +1,4 @@
-%%% @copyright (C) 2019-2022, Maxim Fedorov
+%%% @copyright (C) 2019-2023, Maxim Fedorov
 %%% @doc
 %%%   Writes monitoring events to I/O device.
 %%% @end

diff --git a/src/erlperf_history.erl b/src/erlperf_history.erl
@@ -1,4 +1,4 @@
-%%% @copyright (C) 2019-2022, Maxim Fedorov
+%%% @copyright (C) 2019-2023, Maxim Fedorov
 %%% @doc
 %%% Collects, accumulates &amp; filters cluster-wide monitoring events.
 %%% Essentially a simple in-memory database for quick cluster overview.