Merge pull request #15 from max-au/max-au/usability-fixes

[erlperf] usability fixes
max-au · Apr 12, 2022 · 57a601e · 57a601e
2 parents 32e27d5 + 2cd34a3
commit 57a601e
Show file tree

Hide file tree

Showing 8 changed files with 52 additions and 29 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,5 +1,8 @@
 # Changelog
 
+## 2.0.1
+- minor bugfixes (friendlier error reporting)
+
 ## 2.0
 - incompatible change: `erlperf` requires runner arity to be defined explicitly.
   Code example: `erlperf:run(#{runner => {timer, sleep, []}, init_runner => "1."})`,

diff --git a/README.md b/README.md
@@ -118,8 +118,8 @@ When there are multiple jobs, multiple columns are printed.
 Command-line benchmarking does not save results anywhere. It is designed to provide a quick answer to the question
 "is that piece of code faster".
 
-## Minimal overhead mode
-Since 2.0, `erlperf` includes "low overhead" mode. It cannot be used for continuous benchmarking. In this mode
+## Timed (low overhead) mode
+Since 2.0, `erlperf` includes timed mode. It cannot be used for continuous benchmarking. In this mode
 runner code is executed specified amount of times in a tight loop:
 
 ```bash
@@ -130,7 +130,7 @@ runner code is executed specified amount of times in a tight loop:
 ```
 
 This mode effectively runs following code: `loop(0) -> ok; loop(Count) -> rand:uniform(), loop(Count - 1).`
-Continuous mode adds 1-2 ns to each iteration.
+Timed mode reduced benchmarking overhead (compared to continuous mode) by 1-2 ns per iteration.
 
 # Benchmarking existing application
 `erlperf` can be used to measure performance of your application running in production, or code that is stored
@@ -319,7 +319,7 @@ two times faster than applying a function, and 20 times faster than repeatedly c
 the same invocation method to get a relevant result.
 
 Absolute benchmarking overhead may be significant for very fast functions taking just a few nanoseconds.
-Use "low overhead mode" for such occasions.
+Use timed mode for such occasions.
 
 ## Experimental: recording call chain
 This experimental feature allows capturing a sequence of calls as a list of
@@ -404,7 +404,7 @@ Number of `run/0` calls per second is reported as throughput. Before 2.0, `erlpe
 used `atomics` to maintain a counter shared between all runner processes, introducing
 unnecessary BIF call overhead. 
 
-Low-overhead mode tightens it even further, turning runner into this function:
+Timed (low-overhead) mode tightens it even further, turning runner into this function:
 ```erlang
 runner(0) ->
     ok;

diff --git a/doc/overview.edoc b/doc/overview.edoc
@@ -1,6 +1,6 @@
  ** this is the overview.doc file for the application 'erlperf' **
 
-   @version 2.0.0
+   @version 2.0.1
    @author Maxim Fedorov, <maximfca@gmail.com>
    @title erlperf: Erlang Performance & Benchmarking Suite.
 
@@ -123,8 +123,8 @@
    Command-line benchmarking does not save results anywhere. It is designed to provide a quick answer to the question
    "is that piece of code faster".
 
-   === Minimal overhead mode ===
-   Since 2.0, `erlperf' includes "low overhead" mode. It cannot be used for continuous benchmarking. In this mode
+   === Timed (low overhead) mode ===
+   Since 2.0, `erlperf' includes timed mode. It cannot be used for continuous benchmarking. In this mode
    runner code is executed specified amount of times in a tight loop:
 
    ```
@@ -135,7 +135,8 @@
    '''
 
    This mode effectively runs following code: `loop(0) -> ok; loop(Count) -> rand:uniform(), loop(Count - 1).'
-   Continuous mode adds 1-2 ns to each iteration.
+   Timed mode reduced benchmarking overhead (compared to continuous mode) by 1-2 ns per iteration.
+
 
    == Benchmarking existing application ==
    `erlperf' can be used to measure performance of your application running in production, or code that is stored
@@ -324,7 +325,7 @@
    the same invocation method to get a relevant result.
 
    Absolute benchmarking overhead may be significant for very fast functions taking just a few nanoseconds.
-   Use "low overhead mode" for such occasions.
+   Use timed mode for such occasions.
 
    === Experimental: recording call chain ===
    This experimental feature allows capturing a sequence of calls as a list of
@@ -409,7 +410,7 @@
    used `atomics' to maintain a counter shared between all runner processes, introducing
    unnecessary BIF call overhead.
 
-   Low-overhead mode tightens it even further, turning runner into this function:
+   Timed (low-overhead) mode tightens it even further, turning runner into this function:
    ```
    runner(0) ->
        ok;

diff --git a/src/erlperf.app.src b/src/erlperf.app.src
@@ -1,6 +1,6 @@
 {application, erlperf,
  [{description, "Erlang Performance & Benchmarking Suite"},
-  {vsn, "2.0.0"},
+  {vsn, "2.0.1"},
   {registered, [
     erlperf_sup, erlperf_job_sup, erlperf_monitor,
    erlperf_history, erlperf_file_log, erlperf_cluster_monitor

diff --git a/src/erlperf.erl b/src/erlperf.erl
@@ -32,7 +32,7 @@
     % ignored when running concurrency test
     concurrency => pos_integer(),
     %% sampling interval: default is 1000 milliseconds (to measure QPS)
-    %% 'undefined' duration is used as a flag for low-overhead benchmarking
+    %% 'undefined' duration is used as a flag for timed benchmarking
     sample_duration => pos_integer() | undefined,
     %% warmup samples: first 'warmup' cycles are ignored (defaults to 0)
     warmup => non_neg_integer(),
@@ -187,7 +187,7 @@ start(Code, Concurrency) ->
     Job.
 
 %% @doc
-%% Low-overhead benchmarking, runs the code Count times and returns
+%% Timed benchmarking, runs the code Count times and returns
 %%  time in microseconds it took to execute the code.
 -spec time(code(), Count :: non_neg_integer()) -> TimeUs :: non_neg_integer().
 time(Code, Count) ->

diff --git a/src/erlperf_cli.erl b/src/erlperf_cli.erl
@@ -29,7 +29,7 @@ main(Args) ->
                     end, ok})
             end,
 
-        %% low-overhead benchmarking is not compatible with many options, and may have "loop" written as 100M, 100K
+        %% timed benchmarking is not compatible with many options, and may have "loop" written as 100M, 100K
         %% TODO: implement mutually exclusive groups in argparse
         RunOpts =
             case maps:find(loop, RunOpts0) of
@@ -75,9 +75,11 @@ main(Args) ->
         error:{generic, Error} ->
             format(error, "Error: ~s~n", [Error]);
         error:{loop, Option} ->
-            format(error, "Low-overhead benchmarking is not compatible with ~s~n", [Option]);
+            format(error, "Timed benchmarking is not compatible with ~s~n", [Option]);
         error:{generate, {parse, FunName, Error}} ->
             format(error, "Parse error for ~s: ~s~n", [FunName, lists:flatten(Error)]);
+        error:{generate, {What, WhatArity, requires, Dep}} ->
+            format(error, "~s/~b requires ~s function defined~n", [What, WhatArity, Dep]);
         error:{compile, Errors, Warnings} ->
             Errors =/= [] andalso format(error, "Compile error: ~s~n", [compile_errors(Errors)]),
             Warnings =/= [] andalso format(warning, "Warning: ~s~n", [compile_errors(Warnings)]);
@@ -95,7 +97,7 @@ compile_errors([]) -> "";
 compile_errors([{_, []} | Tail]) ->
     compile_errors(Tail);
 compile_errors([{L, [{_Anno, Mod, Err} | T1]} | Tail]) ->
-    lists:flatten(Mod:format_error(Err)) ++ compile_errors([{L, T1} | Tail]).
+    lists:flatten(Mod:format_error(Err) ++ io_lib:format("~n", [])) ++ compile_errors([{L, T1} | Tail]).
 
 callable(Type, {Args, Acc}) ->
     {Args, merge_callable(Type, maps:get(Type, Args, []), Acc, [])}.
@@ -137,7 +139,7 @@ parse_loop(Loop) ->
         {Int, "K"} -> Int * 1000;
         {Int, []} -> Int;
         {Int, "G"} -> Int * 1000000000;
-        _Other -> erlang:error({generic, "unsupported syntax for low-overhead count: " ++ Loop})
+        _Other -> erlang:error({generic, "unsupported syntax for timed iteration count: " ++ Loop})
     end.
 
 arguments() ->
@@ -146,7 +148,7 @@ arguments() ->
         "Benchmark rand:uniform() vs crypto:strong_rand_bytes(2):\n    erlperf 'rand:uniform().' 'crypto:strong_rand_bytes(2).' --samples 10 --warmup 1\n"
         "Figure out concurrency limits:\n    erlperf 'code:is_loaded(local_udp).' --init 'code:ensure_loaded(local_udp).'\n"
         "Benchmark pg join/leave operations:\n    erlperf 'pg:join(s, foo, self()), pg:leave(s, foo, self()).' --init 'pg:start_link(s).'\n"
-        "Low-overhead benchmark for a single BIF:\n    erlperf 'erlang:unique_integer().' -l 1000000\n",
+        "Timed benchmark for a single BIF:\n    erlperf 'erlang:unique_integer().' -l 1000000\n",
         arguments => [
             #{name => concurrency, short => $c, long => "-concurrency",
                 help => "number of concurrently executed runner processes",
@@ -158,7 +160,7 @@ arguments() ->
                 help => "minimum number of samples to collect (3)",
                 type => {int, [{min, 1}]}},
             #{name => loop, short => $l, long => "-loop",
-                help => "low overhead mode count, e.g. 100K, 200M, 3G"},
+                help => "timed mode (lower overhead) iteration count: 50, 100K, 200M, 3G"},
             #{name => warmup, short => $w, long => "-warmup",
                 help => "number of samples to skip (0)",
                 type => {int, [{min, 0}]}},
@@ -245,7 +247,7 @@ run_main(#{loop := Loop}, #{}, Codes) ->
 run_main(RunOpts, SqueezeOps, [Code]) when map_size(SqueezeOps) > 0 ->
     Duration = maps:get(sample_duration, RunOpts, 1000),
     {QPS, Con} = erlperf:run(Code, RunOpts, SqueezeOps),
-    Timing = Duration * 1000000 div QPS * Con,
+    Timing = if QPS =:=0 -> infinity; true -> Duration * 1000000 div QPS * Con end,
     format_result([Code], Con, [QPS], [Timing]);
 
 %% benchmark: don't print "Relative" column for a single sample
@@ -261,7 +263,7 @@ run_main(RunOpts, _, Execs) ->
     Concurrency = maps:get(concurrency, RunOpts, 1),
     Duration = maps:get(sample_duration, RunOpts, 1000),
     Throughput = erlperf:benchmark(Execs, RunOpts, undefined),
-    Timings = [Duration * 1000000 div T * Concurrency || T <- Throughput],
+    Timings = [if T =:= 0 -> infinity; true -> Duration * 1000000 div T * Concurrency end || T <- Throughput],
     format_result(Execs, Concurrency, Throughput, Timings).
 
 format_result(Execs, Concurrency, Throughput, Timings) ->

diff --git a/src/erlperf_file_log.erl b/src/erlperf_file_log.erl
@@ -127,7 +127,9 @@ format_number(Num) ->
 
 %% @doc Formats time duration, from nanoseconds to seconds
 %%  Example: 88 -> 88 ns, 88000 -> 88 us, 10000000 -> 10 ms
--spec format_duration(non_neg_integer()) -> string().
+-spec format_duration(non_neg_integer() | infinity) -> string().
+format_duration(infinity) ->
+    "inf";
 format_duration(Num) when Num > 100000000000 ->
     integer_to_list(round(Num / 1000000000)) ++ " s";
 format_duration(Num) when Num > 100000000 ->

diff --git a/src/erlperf_monitor.erl b/src/erlperf_monitor.erl
@@ -13,7 +13,8 @@
 -export([
     start/0,
     start_link/0,
-    register/3
+    register/3,
+    unregister/1
 ]).
 
 %% gen_server callbacks
@@ -68,13 +69,19 @@ start_link() ->
 register(Job, Handle, Initial) ->
     gen_server:cast(?MODULE, {register, Job, Handle, Initial}).
 
+%% @doc
+%% Removes the job from monitoring (e.g. job has no workers running)
+-spec unregister(pid()) -> ok.
+unregister(Job) ->
+    gen_server:cast(?MODULE, {unregister, Job}).
+
 %%%===================================================================
 %%% gen_server callbacks
 
 %% System monitor state
 -record(state, {
     % bi-map of job processes to counters
-    jobs :: [{pid(), Handle :: erlperf_job:handle(), Prev :: integer()}]    ,
+    jobs :: [{pid(), reference(), Handle :: erlperf_job:handle(), Prev :: integer()}]    ,
     % scheduler data saved from last call
     sched_data :: [{pos_integer(), integer(), integer()}],
     % number of normal schedulers
@@ -112,8 +119,16 @@ handle_call(_Request, _From, _State) ->
     erlang:error(notsup).
 
 handle_cast({register, Job, Handle, Initial}, #state{jobs = Jobs} = State) ->
-    monitor(process, Job),
-    {noreply, State#state{jobs = [{Job, Handle, Initial} | Jobs]}}.
+    MRef = monitor(process, Job),
+    {noreply, State#state{jobs = [{Job, MRef, Handle, Initial} | Jobs]}};
+handle_cast({unregister, Job}, #state{jobs = Jobs} = State) ->
+    case lists:keyfind(Job, 1, Jobs) of
+        {Job, MRef, _, _} ->
+            demonitor(MRef, [flush]),
+            {noreply, State#state{jobs = lists:keydelete(Job, 1, Jobs)}};
+        false ->
+            {noreply, State}
+    end.
 
 handle_info({'DOWN', _MRef, process, Pid, _Reason}, #state{jobs = Jobs} = State) ->
     {noreply, State#state{jobs = lists:keydelete(Pid, 1, Jobs)}};
@@ -129,13 +144,13 @@ handle_tick(#state{sched_data = Data, normal = Normal, dcpu = Dcpu} = State) ->
     {NU, DU, DioU} = fold_normal(Data, NewSched, Normal, Dcpu, 0, 0),
     % add benchmarking info
     {Jobs, UpdatedJobs} = lists:foldl(
-        fun ({Pid, Handle, Prev}, {J, Save}) ->
+        fun ({Pid, MRef, Handle, Prev}, {J, Save}) ->
             Cycles =
                 case erlperf_job:sample(Handle) of
                     C when is_integer(C) -> C;
                     undefined -> Prev %% job is stopped, race condition here
                 end,
-            {[{Pid, Cycles - Prev} | J], [{Pid, Handle, Cycles} | Save]}
+            {[{Pid, Cycles - Prev} | J], [{Pid, MRef, Handle, Cycles} | Save]}
         end, {[], []}, State#state.jobs),
     %
     Sample = #{