Skip to content

Commit

Permalink
adds the lifting benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
ivg committed Mar 8, 2019
1 parent 1c3be48 commit fe6cd63
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 0 deletions.
7 changes: 7 additions & 0 deletions lifting-benchmark/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.PHONY : all

all:
ocamlbuild -package bap -package findlib.dynload bench.native

clean:
ocamlbuild -clean
77 changes: 77 additions & 0 deletions lifting-benchmark/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
Lifter Benchmark
================

This simple tool will take a file and disassemble it from the very
beginning till the last byte (even if it is not code, at all), and try
to lift every possible instruction. It won't try to parse file headers,
find code sections, or anything like this. At the end, if everything
went fine, it will print the descriptive statistics of the lifter.

The benchmark also disables the optimization pipeline, however there is still
a small overhead over the lifter, because the typechecking is enforced.
In fact, at the end, we typecheck at least trice :)

Building and using
==================

```
make
./bench.native <filename>
```

You can also specify an architecture:
```
./bench.native <arch> <filename>
```


Examples
========

On Intel(R) Xeon(R) CPU E5-2630 v4 @ 2.20GHz, bap 1.5 built wit OCaml 4.05.0+flambda,
and llvm-3.8, yields the following results,
```
$ ./bench.native /lib/x86_64-linux-gnu/libc-2.23.so
Statistics for the x86_64 lifter
Total time: 6.89342 s
Total throughtput: 265 kB/s
Insn throughtput: 74532 I/s
Insn latency: 13.42 mks/I
Bytes processed: 1868831
Data bytes: 24636
Code bytes: 1844195
Code density: 98.68%
Total number of instructions: 513777
Lifted instructions: 497837
Lifting coverage: 96.90%
```

and for Google Chrome

```
$ ./bench.native /opt/google/chrome/chrome
Statistics for the x86_64 lifter
Total time: 571.788 s
Total throughtput: 240 kB/s
Insn throughtput: 75191 I/s
Insn latency: 13.30 mks/I
Bytes processed: 140517351
Data bytes: 1285721
Code bytes: 139231630
Code density: 99.09%
Total number of instructions: 42993264
Lifted instructions: 41657415
Lifting coverage: 96.89%
```

Normalized to the CPU speed (i.e., CPU speed / throughput ), it means
that currently the lifter on average makes 9,000 ops per disassembled
byte and 30,000 operations per disassembled instruction.


Caveats
=======

For some reason the arm backend in LLVM just stops instead of erroring,
on a malformed data. So for arm we need to feed it with something that
looks like code.
102 changes: 102 additions & 0 deletions lifting-benchmark/bench.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
open Core_kernel
open Bap.Std
open Bap_plugins.Std

let usage () =
eprintf "Performs linear sweep disassembly and lifting of raw bytes\n";
eprintf "Usage: ./bench [<arch>] <file>\n";
exit 1

module Dis = Disasm_expert.Basic

module Stats = struct
let start = ref 0.
let finish = ref 0.
let fails = ref 0
let insns = ref 0
let code = ref 0
let data = ref 0
let update stat data = stat := !stat + data


let print arch =
let total = !finish -. !start in
let total_bytes = !code + !data in
let total_insns = !insns + !fails in
let latency = (total /. float total_insns) *. 1e6 in
let speed p = (float p /. total) in
let ratio m n = (float m /. float n) *. 100. in
printf "Statistics for the %a lifter\n" Arch.ppo arch;
printf "Total time: %g s\n" total;
printf "Total throughtput: %.0f kB/s\n" @@ speed (total_bytes / 1024) ;
printf "Insn throughtput: %.0f I/s\n" @@ speed total_insns;
printf "Insn latency: %.2f mks/I\n" latency;
printf "Bytes processed: %d\n" total_bytes;
printf "Data bytes: %d\n" !data;
printf "Code bytes: %d\n" !code;
printf "Code density: %.2f%%\n" @@ ratio !code total_bytes;
printf "Total number of instructions: %d\n" total_insns;
printf "Lifted instructions: %d\n" !insns;
printf "Lifting coverage: %.2f%%\n" @@ ratio !insns total_insns
end

let disasm arch mem =
let module Target = (val target_of_arch arch) in
Dis.with_disasm ~backend:"llvm" (Arch.to_string arch) ~f:(fun dis ->
Stats.start := Unix.gettimeofday ();
Result.return @@ Dis.run dis mem ~init:() ~return:ident
~stop_on:[`Valid]
~stopped:(fun s () ->
Stats.finish := Unix.gettimeofday ())
~invalid:(fun s mem () ->
Stats.update Stats.data (Memory.length mem);
Dis.step s ())
~hit:(fun s mem insn () ->
Stats.update Stats.code (Memory.length mem);
match Target.lift mem insn with
| Ok _ ->
incr Stats.insns;
Dis.step s ();
| Error _ ->
incr Stats.fails;
Dis.step s ()))


let main arch file =
let size = Arch.addr_size arch in
let base = Word.zero (Size.in_bits size) in
match Memory.of_file (Arch.endian arch) base file with
| Error err ->
eprintf "Error: file is not readable or regular - %s\n"
(Error.to_string_hum err);
exit 1
| Ok mem -> match disasm arch mem with
| Error err ->
eprintf "Error: failed to initialize the disassembler - %s\n"
(Error.to_string_hum err)
| Ok () ->
Stats.print arch

let read_arch s = match Arch.of_string s with
| Some a -> a
| None ->
eprintf "Error: unknown architecture %s\n" s;
eprintf "The list of known architectures:\n";
List.iter Arch.all ~f:(eprintf "\t%a\n" Arch.ppo);
exit 1

let read_file s =
if Sys.file_exists s && not (Sys.is_directory s)
then s
else begin
eprintf "Error: `%s' is not a regular file\n" s;
usage ()
end


let () =
Plugins.run ~exclude:["bil"] ();
match Array.length Sys.argv with
| 2 -> main `x86_64 (read_file Sys.argv.(1))
| 3 -> main (read_arch Sys.argv.(1)) (read_file Sys.argv.(2))
| _ -> usage ()
Empty file added lifting-benchmark/bench.mli
Empty file.

0 comments on commit fe6cd63

Please sign in to comment.