Skip to content

Commit

Permalink
partially upgrades byteweight to work with the modern bap (BinaryAnal…
Browse files Browse the repository at this point in the history
…ysisPlatform#1431)

This change re-enables Byteweight, which was effectively disabled
since after we have started to install signatures in a different
place. Next, it revamps the byteweight plugin and
bap-byteweight-signatures library to work with modern bap
infrastructure that uses Theory.Target instead of the old
Bap.Std.Arch.

The plugin itself was also rewritten and uses only modern
interfaces. It stores roots in the knowledge base (no streams anymore)
and uses targets and compiler properties from the knowledge base.
  • Loading branch information
ivg authored Feb 12, 2022
1 parent 5ad24c7 commit 5b1acc0
Show file tree
Hide file tree
Showing 9 changed files with 367 additions and 142 deletions.
12 changes: 11 additions & 1 deletion lib/bap_byteweight/bap_byteweight.ml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ module Make2
end

module Bytes = struct
include Make2(struct
module Self = Make2(struct
type t = mem
type key = mem

Expand All @@ -114,6 +114,16 @@ module Bytes = struct
| _ -> None
end)(Memory.Trie.Stable.V1.R8)

let t = Bap_byteweight_signatures.Data.declare "bytes"
~load:(fun bytes ->
Binable.of_string (module Self)
(Caml.Bytes.unsafe_to_string bytes))
~save:(fun data ->
Caml.Bytes.unsafe_of_string @@
Binable.to_string (module Self) data)

include Self


let find bw ~length ~threshold mem =
let start = Memory.min_addr mem in
Expand Down
2 changes: 2 additions & 0 deletions lib/bap_byteweight/bap_byteweight.mli
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ module Bytes : sig
and type corpus = mem
and type token := word

val t : t Bap_byteweight_signatures.data


(** [find mem ~length ~threshold corpus] extract addresses of all
memory chunks of the specified [length], that were classified
Expand Down
151 changes: 136 additions & 15 deletions lib/bap_byteweight/bap_byteweight_signatures.ml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
open Core_kernel
open Regular.Std
open Bap_core_theory
open Bap.Std
include Self()

module Config = Bap_main.Extension.Configuration

Expand All @@ -14,14 +13,62 @@ type error = [
| `Sys_error of string
]


type 'a data = {
name : string;
load : (bytes -> 'a);
save : ('a -> bytes);
}

exception Failed of error

let fail error = raise (Failed error)
let zip_error entry err =
fail (`Corrupted (sprintf "%s: %s" entry err))

let entry ?(comp="default") ~mode arch =
Arch.to_string arch / comp / mode
let corrupted entry err = `Corrupted (sprintf "%s: %s" entry err)
let zip_error entry err = fail (corrupted entry err)

let compiler_name =
Option.value_map ~default:"default" ~f: Theory.Compiler.name

let matches_modulo_bits t name =
match Theory.Target.matching t name with
| None -> false
| Some t' -> Theory.Target.bits t = Theory.Target.bits t'

let matching_entry ?compiler target data {Zip.filename} =
match String.split filename ~on:'/' with
| [p1; p2; p3] ->
matches_modulo_bits target p1 &&
String.equal (compiler_name compiler) p2 &&
String.equal data.name p3
| _ -> fail (`Corrupted ("invalid entry name: " ^ filename))

let with_input file k =
let zip = Zip.open_in file in
protect ~finally:(fun () -> Zip.close_in zip) ~f:(fun () -> k zip)

let with_output file k =
let zip = Zip.open_out file in
protect ~finally:(fun () -> Zip.close_out zip) ~f:(fun () -> k zip)

let read_entry ?compiler target data file =
with_input file @@ fun zip ->
Zip.entries zip |>
List.find ~f:(matching_entry ?compiler target data) |> function
| None -> None
| Some entry ->
Some (data.load (Bytes.of_string (Zip.read_entry zip entry)))

let read_entries file =
if Fn.non Sys.file_exists file then []
else with_input file @@ fun zip ->
Zip.entries zip |>
List.map ~f:(fun entry ->
entry,Zip.read_entry zip entry)

let target_name = Fn.compose KB.Name.unqualified Theory.Target.name

let make_entry ?compiler target data =
target_name target / compiler_name compiler / data.name

let make_path root = root / "signatures" / "byteweight.zip"

Expand All @@ -31,14 +78,88 @@ let default_path = match Sys.getenv_opt "BAP_SIGFILE" with
| Some path -> path
| None -> make_path Config.datadir

let paths = [default_path; system_path]

let resolve_path user = match user with
let default_paths = [default_path; system_path]

let try_lookup ?(paths=[]) ?compiler target data =
paths @ default_paths |> List.find_map ~f:(fun path ->
if Sys.file_exists path
then read_entry ?compiler target data path
else None)

let of_exn = function
| Sys_error msg -> Error (`Sys_error msg)
| Zip.Error (_,ent,err) -> Error (corrupted ent err)
| Failed er -> Error er
| other -> raise other

let lookup ?paths ?compiler target data =
match try_lookup ?paths ?compiler target data with
| exception exn -> of_exn exn
| None -> Error (`No_entry (target_name target))
| Some data -> Ok data


let update_or_fail ?compiler target data payload path =
let entries =
read_entries path |>
List.filter ~f:(fun (entry,_) ->
not (matching_entry ?compiler target data entry)) in
with_output path @@ fun zip ->
let path = make_entry ?compiler target data in
let data = Bytes.unsafe_to_string (data.save payload) in
Zip.add_entry data zip path;
List.iter entries ~f:(fun ({Zip.filename; extra; comment; mtime},data) ->
Zip.add_entry data zip filename
~extra ~comment ~mtime)

let copy input output =
let len = 0x1000 in
let buf = Bytes.create len in
let rec loop () =
let read = In_channel.input input ~buf ~pos:0 ~len in
Out_channel.output output ~buf ~pos:0 ~len:read;
if read = len then loop () in
loop ()

let temporary_copy file =
let tmp,output = Caml.Filename.open_temp_file "byteweight" "copy" in
In_channel.with_file file ~f:(fun input -> copy input output);
Out_channel.close output;
tmp

let update ?compiler target data payload path =
let tmp = temporary_copy path in
try
update_or_fail ?compiler target data payload path;
Sys.rename tmp path;
Ok ()
with exn ->
Sys.remove tmp;
of_exn exn

module Data = struct
let registry = Hash_set.create (module String)

let declare ~load ~save name =
if Hash_set.mem registry name
then failwithf "The byteweight data type named %S is \
already registered, please pick another name"
name ();
Hash_set.add registry name;
{load; save; name}
end

(* the old deprecated implementation *)

let resolve_path user =
let user = Option.value_map user ~f:List.return ~default:[] in
let paths = user @ default_paths in
match List.find paths ~f:Sys.file_exists with
| None -> fail `No_signatures
| Some path -> path
| None ->
match List.find paths ~f:Sys.file_exists with
| Some path -> path
| None -> fail `No_signatures

let entry ?(comp="default") ~mode arch =
Arch.to_string arch / comp / mode

let load_exn ?comp ?path ~mode arch =
let path = resolve_path path in
Expand All @@ -48,7 +169,7 @@ let load_exn ?comp ?path ~mode arch =
let entry_path = entry ?comp ~mode arch in
let r = try
let entry = Zip.find_entry zip entry_path in
Ok (Zip.read_entry zip entry |> Bytes.of_string)
Ok (Zip.read_entry zip entry |> Caml.Bytes.unsafe_of_string)
with Caml.Not_found -> fail (`No_entry entry_path)
| Zip.Error (_,ent,err) -> zip_error ent err in
Zip.close_in zip;
Expand Down
83 changes: 78 additions & 5 deletions lib/bap_byteweight/bap_byteweight_signatures.mli
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
(** Provides signatures storage *)
(** Interface to the unified storage of signatures.
The signatures a key-value pairs (entries) located in one or more
archives. Keys are target/compiler descriptions and values are
arbitrary data.
The data types of the signature are described with the [Data]
module. This library doesn't specify any data types of signature
values and they are commonly provided by the libraries that define
those data types, e.g., [Bap_byteweight.Bytes].
*)

open Core_kernel
open Regular.Std
open Bap_core_theory
open Bap.Std

(** Error conditions *)
Expand All @@ -11,16 +22,77 @@ type error = [
| `Sys_error of string (** System error has occurred *)
]

(** the descriptor of the data type stored in the signature entry.
@since 2.5.0
*)
type 'a data


(** [lookup t f] looks up for the matching entry in the signature database.
The search is performed over the [paths] list that is a list of
filenames. The first matching entry is selected. If a file in the
[paths] list doesn't exist then it is skipped. If it exists but
unreadable an error is returned.
The paths list is always appended by [[default_path; system_path]],
in that specific order.
If [compiler] is specified, then only entries that list matching
compiler will be selected.
The target matches are performed with the [Theory.Target.matches]
function.
@since 2.5.0
*)
val lookup :
?paths:string list ->
?compiler:Theory.compiler ->
Theory.Target.t -> 'a data -> ('a, error) Result.t


(** [update t f x path] updates or creates an entry in the signature database.
Removes all entries that match with the specified compiler,
target, and data type and adds a new entry with the provided
data. All unmatching entries are preserved.
@since 2.5.0
*)
val update :
?compiler:Theory.compiler ->
Theory.Target.t -> 'a data -> 'a -> string -> (unit,error) Result.t


(** Interface for declaring signature database data types. *)
module Data : sig

(** [declare ~load ~save name] declares a new mode.
The [load] and [save] functions are used to store the mode
information in the signatures database.
Raises an exception if the mode name is not unique.
*)
val declare :
load:(bytes -> 'a) ->
save:('a -> bytes) ->
string -> 'a data
end

(** [save ?comp ~mode ~path arch data] store signatures data in the
database of signatures specified by the [path] parameter. The
triple [arch-comp-mode] defines a key for the created entry. If an
database of signatures specified by the [path] parameter.
The triple [arch-comp-mode] defines a key for the created entry. If an
entry with the same name existed, then it would be overwritten
with the new data. If the database, doesn't exist, then it will be
created and the specified destination.*)
val save : ?comp:string -> mode:string -> path:string -> arch -> bytes ->
(unit,error) Result.t

[@@deprecated "since 2022-02 use [lookup]"]

(** [load ?comp ?path ~mode arch] finds a signature for the specified
[arch-comp-path] triple.
Expand All @@ -33,6 +105,7 @@ val save : ?comp:string -> mode:string -> path:string -> arch -> bytes ->
*)
val load : ?comp:string -> ?path:string -> mode:string -> arch ->
(bytes,error) Result.t
[@@deprecated "since 2022-02 use [update]"]


(** default path for the user's signatures database.
Expand Down
7 changes: 5 additions & 2 deletions oasis/byteweight
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@ Library bap_byteweight
Build$: flag(everything) || flag(byteweight)
CompiledObject: best
Modules: Bap_byteweight, Bap_byteweight_signatures
BuildDepends: bap, bap-main, core_kernel, uri, regular, camlzip, ppx_bap
BuildDepends: bap, bap-main, bap-core-theory, bap-knowledge,
core_kernel, uri, camlzip, ppx_bap

Library byteweight_plugin
Path: plugins/byteweight
FindlibName: bap-plugin-byteweight
Build$: flag(everything) || flag(byteweight)
CompiledObject: best
BuildDepends: bap, bap-byteweight, core_kernel, regular, ppx_bap, bap-future
BuildDepends: bap, bap-byteweight, core_kernel, ppx_bap,
bitvec, bitvec-order,
bap-knowledge, bap-core-theory, bap-main
InternalModules: Byteweight_main
XMETADescription: find function starts using Byteweight algorithm
XMETAExtraLines: tags="pass, rooter"
4 changes: 2 additions & 2 deletions plugins/byteweight/.merlin
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
PKG cmdliner
REC
B ../../_build/lib/bap_byteweight
B ../../_build/lib/bap_byteweight
B ../../lib/bap_byteweight
Loading

0 comments on commit 5b1acc0

Please sign in to comment.