From 2e7a6a20165948af07509f763ec8dbcb3a367c94 Mon Sep 17 00:00:00 2001 From: Thomas Gazagnaire Date: Mon, 2 Oct 2023 13:53:46 +0200 Subject: [PATCH] Remove Stream Proofs (#2275) * Remove Stream Proofs They are hard to maintain and nobdy use those anymore. * Update CHANGES --- CHANGES.md | 8 +- examples/merkle_proofs.md | 4 +- src/irmin-test/store.ml | 57 +---- src/irmin/proof.ml | 418 ++--------------------------------- src/irmin/proof_intf.ml | 101 +-------- src/irmin/store_intf.ml | 49 +--- src/irmin/tree.ml | 67 +----- src/irmin/tree_intf.ml | 27 +-- test/irmin-pack/test_tree.ml | 176 +-------------- 9 files changed, 65 insertions(+), 842 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 452feacbc6..a9b6353c70 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -21,6 +21,10 @@ - **irmin-http** - Removed `irmin-http` since it is not compatible with generic keys. `irmin-grapqhl` or `irmin-server` should be used instead. (#1902, @zshipko) +- **irmin** + - Removed stream proofs. We now only have Merkle tree proofs. This simplifies + the maintenance of that part of the code, as ensuring the correct order of + cached IO operations was tricky for stream proofs (#2275, @samoht) ## 3.8.0 (2023-07-06) @@ -30,7 +34,7 @@ - Change behavior of `Irmin.Conf.key` to disallow duplicate key names by default. Add `allow_duplicate` optional argument to override. (#2252, @metanivek) - + - **irmin-pack** - Add maximum memory as an alternative configuration option, `lru_max_memory`, for setting LRU capacity. (@metanivek, #2254) @@ -51,7 +55,7 @@ - **irmin-cli** - Changed `--store irf` to `--store fs` to align the CLI with what is published on the Irmin website (#2243, @wyn) - + ## 3.7.2 (2023-06-16) ### Fixed diff --git a/examples/merkle_proofs.md b/examples/merkle_proofs.md index 0434808abd..1fde16dff6 100644 --- a/examples/merkle_proofs.md +++ b/examples/merkle_proofs.md @@ -134,8 +134,8 @@ Here is the signature of `produce_proof`: val produce_proof : Store.repo -> Store.Tree.kinded_key -> - (Store.tree -> (Store.tree * 'a) Lwt.t) -> - (Store.Tree.Proof.tree Store.Tree.Proof.t * 'a) Lwt.t = + (Store.tree -> (Store.tree * 'a) Lwt.t) -> (Store.Tree.Proof.t * 'a) Lwt.t = + ``` `produce_proof repo key_before f` is `(proof = { state; hash_before; hash_after }, f_res)`. `f` is invoked once per call to `produce_proof` and `f tree_before` is `(tree_after, f_res)`. diff --git a/src/irmin-test/store.ml b/src/irmin-test/store.ml index f0b644a6af..ce6b5204dd 100644 --- a/src/irmin-test/store.ml +++ b/src/irmin-test/store.ml @@ -1454,8 +1454,7 @@ module Make (S : Generic_key) = struct in run x test - let pp_proof = Irmin.Type.pp (S.Tree.Proof.t S.Tree.Proof.tree_t) - let pp_stream = Irmin.Type.pp (S.Tree.Proof.t S.Tree.Proof.stream_t) + let pp_proof = Irmin.Type.pp S.Tree.Proof.t let test_proofs x () = let test repo = @@ -1656,19 +1655,6 @@ module Make (S : Generic_key) = struct in let* () = Lwt_list.iter_s check_proof [ f0; f1 ] in - let check_stream f = - let* p, () = S.Tree.produce_stream repo key f in - [%log.debug "Verifying stream %a" pp_stream p]; - let+ r = S.Tree.verify_stream p f in - match r with - | Ok (_, ()) -> () - | Error e -> - Alcotest.failf "check_stream: %a" - (Irmin.Type.pp S.Tree.verifier_error_t) - e - in - let* () = Lwt_list.iter_s check_stream [ f0; f1 ] in - (* check env sharing *) let tree () = S.Tree.of_concrete @@ -1764,47 +1750,6 @@ module Make (S : Generic_key) = struct (fun c -> check_bad_proof (proof ~state:c ())) some_contents in - - (* test negative streams *) - let check_bad_stream p = - let+ r = S.Tree.verify_stream p f0 in - match r with - | Ok _ -> - Alcotest.failf "verify_stream should have failed %a" pp_stream p - | Error _ -> () - in - - let* p0, () = S.Tree.produce_stream repo key f0 in - let proof ?(before = S.Tree.Proof.before p0) - ?(after = S.Tree.Proof.after p0) ?(contents = S.Tree.Proof.state p0) - () = - S.Tree.Proof.v ~before ~after contents - in - let wrong_hash = B.Contents.Hash.hash "not the right hash!" in - let wrong_kinded_hash = `Node wrong_hash in - let* () = check_bad_stream (proof ~before:wrong_kinded_hash ()) in - let* () = check_bad_stream (proof ~after:wrong_kinded_hash ()) in - let* _ = S.Tree.verify_stream (proof ()) f0 in - let some_contents : S.Tree.Proof.stream list = - let s : S.Tree.Proof.elt list -> S.Tree.Proof.stream = List.to_seq in - let ok = List.of_seq (S.Tree.Proof.state p0) in - [ - s []; - s [ Node [] ]; - s [ Inode { length = 1024; proofs = [] } ]; - s [ Contents "yo" ]; - s (ok @ [ Node [] ]); - ] - in - let* () = - let x = ref 1 in - Lwt_list.iter_s - (fun c -> - incr x; - check_bad_stream (proof ~contents:c ())) - some_contents - in - B.Repo.close repo in run x test diff --git a/src/irmin/proof.ml b/src/irmin/proof.ml index 52e95ed64e..f1cdd1b1bf 100644 --- a/src/irmin/proof.ml +++ b/src/irmin/proof.ml @@ -63,7 +63,7 @@ struct type stream = elt Seq.t [@@deriving irmin] - type 'a t = { before : kinded_hash; after : kinded_hash; state : 'a } + type t = { before : kinded_hash; after : kinded_hash; state : tree } [@@deriving irmin] let before t = t.before @@ -72,30 +72,10 @@ struct let v ~before ~after state = { after; before; state } end -type bad_stream_exn = - | Stream_too_long of { context : string; reason : string } - | Stream_too_short of { context : string; reason : string } - | Proof_mismatch of { context : string; reason : string } - exception Bad_proof of { context : string } -exception Bad_stream of bad_stream_exn let bad_proof_exn context = raise (Bad_proof { context }) -let bad_stream_too_long context reason = - raise (Bad_stream (Stream_too_long { context; reason })) - -let bad_stream_too_short context reason = - raise (Bad_stream (Stream_too_short { context; reason })) - -let bad_stream_exn context reason = - raise (Bad_stream (Proof_mismatch { context; reason })) - -let bad_stream_exn_fmt s fmt = Fmt.kstr (bad_stream_exn ("Proof.Env." ^ s)) fmt - -let bad_stream_too_short_fmt s fmt = - Fmt.kstr (bad_stream_too_short ("Proof.Env." ^ s)) fmt - module Env (B : Backend.S) (P : S @@ -120,7 +100,6 @@ struct end type mode = Produce | Serialise | Deserialise | Consume [@@deriving irmin] - type kind = Set | Stream [@@deriving irmin] module Set = struct type produce = { @@ -149,45 +128,7 @@ struct Deserialise { contents = Hashes.create 13; nodes = Hashes.create 13 } end - module Stream = struct - let ref_t v = Type.map v ref ( ! ) - - type produce = { - set : unit Hashes.t; - singleton_inodes : (int * H.t) Hashes.t; - rev_elts : (H.t * P.elt) list ref; - rev_elts_size : int ref; - } - [@@deriving irmin] - - type consume = { - nodes : B.Node_portable.t Hashes.t; - contents : B.Contents.Val.t Hashes.t; - stream : P.elt Seq.t ref; - } - [@@deriving irmin] - - type t = Produce of produce | Consume of consume [@@deriving irmin] - - let producer () = - let set = Hashes.create 13 in - let singleton_inodes = Hashes.create 13 in - let rev_elts = ref [] in - let rev_elts_size = ref 0 in - Produce { set; singleton_inodes; rev_elts; rev_elts_size } - - let consumer stream = - let nodes = Hashes.create 13 in - let contents = Hashes.create 13 in - let stream = ref stream in - Consume { nodes; contents; stream } - - let push { rev_elts; rev_elts_size; _ } h_elt index = - incr rev_elts_size; - rev_elts := List.insert_exn !rev_elts index h_elt - end - - type v = Empty | Set of Set.t | Stream of Stream.t [@@deriving irmin] + type v = Empty | Set of Set.t [@@deriving irmin] type t = v ref let t = Type.map v_t ref ( ! ) @@ -197,214 +138,32 @@ struct type hash = H.t [@@deriving irmin ~equal ~pp] - let rec forward_lookup h singleton_inodes : (int * hash) list option = - match Hashes.find_opt singleton_inodes h with - | None -> None - | Some (i', h') -> ( - match forward_lookup h' singleton_inodes with - | None -> Some [ (i', h') ] - | Some l -> Some ((i', h') :: l)) - - let apply_extenders ~length singleton_inodes skips proofs = - let rec accumulate_segments ~(acc : int Reversed_list.t) h = function - | [] -> (Reversed_list.rev acc, h) - | (i', h') :: rest -> accumulate_segments ~acc:(i' :: acc) h' rest - in - let inode = P.Inode { length; proofs } in - match proofs with - | [ (i, h) ] -> ( - match forward_lookup h singleton_inodes with - | None -> inode - | Some ls -> ( - let () = - (* Push all hashes except the last one into [skips] *) - match List.rev ((i, h) :: ls) with - | [] | [ _ ] -> failwith "idk" - | _ :: tl -> List.iter (fun (_, h) -> Hashes.add skips h ()) tl - in - let i, h = accumulate_segments ~acc:[ i ] h ls in - match i with - | [] | [ _ ] -> assert false - | segments -> P.Inode_extender { length; segments; proof = h })) - | _ -> inode - - let post_processing singleton_inodes (stream : (hash * P.elt) list) : - P.elt list = - let skips = Hashes.create 13 in - (* [skips] are the elements of the [stream] that are included in the - extenders, they will be removed from the final stream. *) - let rec aux rev_elts = function - | [] -> List.rev rev_elts - | (h, elt) :: rest -> - if Hashes.mem skips h then aux rev_elts rest - else - let elt' : P.elt = - match (elt : P.elt) with - | P.Inode { length; proofs } -> - apply_extenders ~length singleton_inodes skips proofs - | Node ls -> Node ls - | Contents c -> Contents c - | Inode_extender _ -> assert false - in - aux (elt' :: rev_elts) rest - in - aux [] stream - - let to_stream t = - match !t with - | Stream (Produce { rev_elts; singleton_inodes; _ }) -> - List.rev !rev_elts |> post_processing singleton_inodes |> List.to_seq + let set_mode t mode = + match (!t, mode) with + | Empty, Produce -> t := Set Set.(producer ()) + | Empty, Deserialise -> t := Set Set.(deserialiser ()) + | Set (Produce set), Serialise -> t := Set Set.(Serialise set) + | Set (Deserialise set), Consume -> t := Set Set.(Consume set) | _ -> assert false - let is_empty_stream t = - match !t with - | Stream (Consume { stream; _ }) -> ( - (* Peek the sequence but do not advance the ref *) - match !stream () with Seq.Nil -> true | _ -> false) - | _ -> false - - let set_mode t (kind : kind) mode = - match kind with - | Set -> ( - match (!t, mode) with - | Empty, Produce -> t := Set Set.(producer ()) - | Empty, Deserialise -> t := Set Set.(deserialiser ()) - | Set (Produce set), Serialise -> t := Set Set.(Serialise set) - | Set (Deserialise set), Consume -> t := Set Set.(Consume set) - | _ -> assert false) - | Stream -> ( - match (!t, mode) with - | Empty, Produce -> t := Stream (Stream.producer ()) - | _ -> assert false) - - let with_set_consume f = + let with_consume f = let t = ref Empty in - set_mode t Set Deserialise; - let stop_deserialise () = set_mode t Set Consume in + set_mode t Deserialise; + let stop_deserialise () = set_mode t Consume in let+ res = f t ~stop_deserialise in t := Empty; res - let with_set_produce f = + let with_produce f = let t = ref Empty in - set_mode t Set Produce; - let start_serialise () = set_mode t Set Serialise in + set_mode t Produce; + let start_serialise () = set_mode t Serialise in let+ res = f t ~start_serialise in t := Empty; res - let with_stream_produce f = - let t = ref Empty in - set_mode t Stream Produce; - let to_stream () = to_stream t in - let+ res = f t ~to_stream in - t := Empty; - res - - let with_stream_consume stream f = - let t = Stream (Stream.consumer stream) |> ref in - let is_empty () = is_empty_stream t in - let+ res = f t ~is_empty in - t := Empty; - res - module Contents_hash = Hash.Typed (H) (B.Contents.Val) - let check_contents_integrity v h = - let h' = Contents_hash.hash v in - if not (equal_hash h' h) then - bad_stream_exn_fmt "check_contents_integrity" "got %a expected %a" pp_hash - h' pp_hash h - - let check_node_integrity v h = - let h' = - try B.Node_portable.hash_exn ~force:false v - with Not_found -> - (* [v] is out of [of_proof], it is supposed to have its hash available - without IOs. - - If these IOs were to occur, it would corrupt the stream being read. - *) - assert false - in - if not (equal_hash h' h) then - bad_stream_exn_fmt "check_node_integrity" "got %a expected %a" pp_hash h' - pp_hash h - - let dehydrate_stream_node v = - (* [v] is fresh out of the node store, meaning that if it is represented - recursively it is still in a shallow state. - - [head v] might trigger IOs. It is fine because [v] is already wrapped - with [with_handler]. *) - match B.Node.Val.head v with - | `Node l -> - let l = - List.map - (function - | step, `Contents (k, m) -> - (step, `Contents (B.Contents.Key.to_hash k, m)) - | step, `Node k -> (step, `Node (B.Node.Key.to_hash k))) - l - in - P.Node l - | `Inode (length, proofs) -> P.Inode { length; proofs } - - let rehydrate_stream_node ~depth (elt : P.elt) h = - let bad_stream_exn_fmt = bad_stream_exn_fmt "rehydrate_stream_node" in - match elt with - | Contents _ -> - bad_stream_exn_fmt - "found contents at depth %d when looking for node with hash %a" depth - pp_hash h - | Node l -> ( - match B.Node_portable.of_proof ~depth (`Values l) with - | Some v -> v - | None -> - bad_stream_exn_fmt - "could not deserialise Node at depth %d when looking for hash %a" - depth pp_hash h) - | Inode { length; proofs } -> - let proofs = List.map (fun (i, h) -> (i, `Blinded h)) proofs in - let inode = `Inode (length, proofs) in - let v = - match B.Node_portable.of_proof ~depth inode with - | Some v -> v - | None -> - bad_stream_exn_fmt - "could not deserialise Inode at depth %d when looking for hash \ - %a" - depth pp_hash h - in - v - | Inode_extender { length; segments; proof } -> - let elt = - List.fold_left - (fun acc i -> `Inode (length, [ (i, acc) ])) - (`Blinded proof) (List.rev segments) - in - let v = - match B.Node_portable.of_proof ~depth elt with - | Some v -> v - | None -> - bad_stream_exn_fmt - "could not deserialise Inode at depth %d when looking for hash \ - %a" - depth pp_hash h - in - v - - let rehydrate_stream_contents (elt : P.elt) h = - let err k = - bad_stream_exn_fmt "find_contents" - "found %s when looking Contents with hash %a" k pp_hash h - in - match elt with - | Node _ -> err "Node" - | Inode _ -> err "Inode" - | Inode_extender _ -> err "Inode" - | Contents v -> v - let find_contents t h = match !t with | Empty -> None @@ -422,24 +181,6 @@ struct | Set (Consume set) -> (* Use the Env to feed the values during consume *) Hashes.find_opt set.contents h - | Stream (Produce _) -> - (* There is no need for sharing with stream proofs *) - None - | Stream (Consume { contents; stream; _ }) -> ( - (* Use the Env to feed the values during consume *) - match Hashes.find_opt contents h with - | Some v -> Some v - | None -> ( - match !stream () with - | Seq.Nil -> - bad_stream_too_short_fmt "find_contents" - "empty stream when looking for hash %a" pp_hash h - | Cons (elt, rest) -> - let v = rehydrate_stream_contents elt h in - check_contents_integrity v h; - stream := rest; - Hashes.add contents h v; - Some v)) let add_contents_from_store t h v = match !t with @@ -457,15 +198,6 @@ struct | Set (Consume _) -> (* This phase has no repo pointer *) assert false - | Stream (Produce ({ set; _ } as cache)) -> - (* Registering when seen for the first time *) - if not @@ Hashes.mem set h then ( - Hashes.add set h (); - let h_elt : hash * P.elt = (h, Contents v) in - Stream.push cache h_elt 0) - | Stream (Consume _) -> - (* This phase has no repo pointer *) - assert false let add_contents_from_proof t h v = match !t with @@ -498,34 +230,6 @@ struct | Set (Consume _) -> (* This phase looks for portable nodes *) None - | Stream (Produce _) -> - (* There is no need for sharing with stream proofs *) - None - | Stream (Consume _) -> - (* This phase looks for portable nodes *) - None - - let find_recpnode t _find ~expected_depth h = - assert (expected_depth > 0); - match !t with - | Stream (Consume { nodes; stream; _ }) -> ( - (* Use the Env to feed the values during consume *) - match Hashes.find_opt nodes h with - | Some v -> Some v - | None -> ( - match !stream () with - | Seq.Nil -> - bad_stream_too_short_fmt "find_recnode" - "empty stream when looking for hash %a" pp_hash h - | Cons (v, rest) -> - let v = rehydrate_stream_node ~depth:expected_depth v h in - (* There is no need to apply [with_handler] here because there - is no repo pointer in this inode. *) - check_node_integrity v h; - stream := rest; - Hashes.add nodes h v; - Some v)) - | _ -> assert false let find_pnode t h = match !t with @@ -533,65 +237,8 @@ struct (* [set] has been filled during deserialise. Using it to provide values during consume. *) Hashes.find_opt set.nodes h - | Stream (Consume { nodes; stream; _ }) -> ( - (* Use the Env to provide the values during consume. Since all hashes - are unique in [stream], [nodes] provides a hash-based sharing. *) - match Hashes.find_opt nodes h with - | Some v -> Some v - | None -> ( - match !stream () with - | Seq.Nil -> - bad_stream_too_short_fmt "find_node" - "empty stream when looking for hash %a" pp_hash h - | Cons (v, rest) -> - (* Shorten [stream] before calling [head] as it might itself - perform reads. *) - stream := rest; - let v = - (* [depth] is 0 because this context deals with root nodes *) - rehydrate_stream_node ~depth:0 v h - in - let v = - (* Call [with_handler] before [head] because the later might - perform reads *) - B.Node_portable.with_handler (find_recpnode t) v - in - let (_ : [ `Node of _ | `Inode of _ ]) = - (* At produce time [dehydrate_stream_node] called [head] which - might have performed IOs. If it did then we must consume - the stream accordingly right now in order to preserve - stream ordering. *) - B.Node_portable.head v - in - check_node_integrity v h; - Hashes.add nodes h v; - - Some v)) | _ -> None - let add_recnode_from_store t find ~expected_depth k = - assert (expected_depth > 0); - match !t with - | Stream (Produce ({ set; singleton_inodes; _ } as cache)) -> ( - (* Registering when seen for the first time, there is no need - for sharing. *) - match find ~expected_depth k with - | None -> None - | Some v -> - let h = B.Node.Key.to_hash k in - if not @@ Hashes.mem set h then ( - Hashes.add set h (); - let elt = dehydrate_stream_node v in - let () = - match elt with - | P.Inode { proofs = [ bucket ]; _ } -> - Hashes.add singleton_inodes h bucket - | _ -> () - in - Stream.push cache (h, elt) 0); - Some v) - | _ -> assert false - let add_node_from_store t h v = match !t with | Empty -> v @@ -611,43 +258,6 @@ struct | Set (Consume _) -> (* This phase has no repo pointer *) assert false - | Stream (Produce ({ set; rev_elts_size; singleton_inodes; _ } as cache)) -> - (* Registering when seen for the first time and wrap its [find] - function. Since there is no sharing during the production of - streamed proofs, the hash may already have been seened. *) - let new_hash = not @@ Hashes.mem set h in - let v = - (* In all case [v] should be wrapped. - If [not new_hash] then wrap it for future IOs on it. - - If [new_hash] then it additionally should be wrapped before - calling [dehydrate_stream_node] as this call may trigger IOs. *) - B.Node.Val.with_handler (add_recnode_from_store t) v - in - if new_hash then ( - Hashes.add set h (); - let len0 = !rev_elts_size in - let elt = dehydrate_stream_node v in - let len1 = !rev_elts_size in - let delta = - (* [delta] is the number of reads that were performed by - [dehydrate_stream_node]. *) - len1 - len0 - in - let () = - match elt with - | P.Inode { proofs = [ bucket ]; _ } -> - Hashes.add singleton_inodes h bucket - | _ -> () - in - (* if [delta = 0] then push the pair at the head of the list. - - if [delta > 0] then insert it before the calls that it triggered. *) - Stream.push cache (h, elt) delta); - v - | Stream (Consume _) -> - (* This phase has no repo pointer *) - assert false let add_pnode_from_proof t h v = match !t with diff --git a/src/irmin/proof_intf.ml b/src/irmin/proof_intf.ml index 6298c7d51d..f32892a1d8 100644 --- a/src/irmin/proof_intf.ml +++ b/src/irmin/proof_intf.ml @@ -120,69 +120,26 @@ module type S = sig | Inode_extender of inode_tree inode_extender [@@deriving irmin] - (** Stream proofs represent an explicit traversal of a Merle tree proof. Every - element (a node, a value, or a shallow pointer) met is first "compressed" - by shallowing its children and then recorded in the proof. - - As stream proofs directly encode the recursive construction of the Merkle - root hash is slightly simpler to implement: the verifier simply needs to - hash the compressed elements lazily, without any memory or choice. - - Moreover, the minimality of stream proofs is trivial to check. Once the - computation has consumed the compressed elements required, it is - sufficient to check that no more compressed elements remain in the proof. - - However, as the compressed elements contain all the hashes of their - shallow children, the size of stream proofs is larger (at least double in - size in practice) than tree proofs, which only contains the hash for - intermediate shallow pointers. *) - - (** The type for elements of stream proofs. - - [Value v] is a proof that the next element read in the store is the value - [v]. - - [Node n] is a proof that the next element read in the store is the node - [n]. - - [Inode i] is a proof that the next element read in the store is the inode - [i]. - - [Inode_extender e] is a proof that the next element read in the store is - the node extender [e]. *) - type elt = - | Contents of contents - | Node of (step * kinded_hash) list - | Inode of hash inode - | Inode_extender of hash inode_extender - [@@deriving irmin] - - type stream = elt Seq.t [@@deriving irmin] - (** The type for stream proofs. - - The sequance [e_1 ... e_n] proves that the [e_1], ..., [e_n] are read in - the store in sequence. *) - - type 'a t [@@deriving irmin] - (** The type for proofs of kind ['a] (i.e. [stream] or [proof]). + type t [@@deriving irmin] + (** The type for Merkle proofs. A proof [p] proves that the state advanced from [before p] to [after p]. [state p]'s hash is [before p], and [state p] contains the minimal information for the computation to reach [after p]. *) - val v : before:kinded_hash -> after:kinded_hash -> 'a -> 'a t + val v : before:kinded_hash -> after:kinded_hash -> tree -> t (** [v ~before ~after p] proves that the state advanced from [before] to [after]. [p]'s hash is [before], and [p] contains the minimal information for the computation to reach [after]. *) - val before : 'a t -> kinded_hash + val before : t -> kinded_hash (** [before t] it the state's hash at the beginning of the computation. *) - val after : 'a t -> kinded_hash + val after : t -> kinded_hash (** [after t] is the state's hash at the end of the computation. *) - val state : 'a t -> 'a - (** [proof t] is a subset of the initial state needed to prove that the proven + val state : t -> tree + (** [state t] is a subset of the initial state needed to prove that the proven computation could run without performing any I/O. *) end @@ -246,33 +203,14 @@ end enriched with backend keys) [Consume] is restricted to manipulating nodes of type - [Backend.Node_portable.t]. - - {1 Hashing of Backend Nodes with Streamed Proofs} - - Hashing a backend node or calling [head] on it may trigger IOs in order to - load inner inodes (this is the case in irmin-pack). - - In various places, [Env] requires calling [head] or [hash_exn] on nodes. - - [Env] must be very careful that these two facts do not lead to chaos during - the recording of IOs' order. - - Two tricks are in place to prevent problems: - - - The [Node.of_proof] functions return nodes that don't require IOs to - produce their hash (i.e. they use caching if necessary). - - The [Node.head] function that is called on a node during - [dehydrate_stream_node] is also called just after [rehydrate_stream_node]. *) + [Backend.Node_portable.t]. *) module type Env = sig - type kind = Set | Stream type mode = Produce | Serialise | Deserialise | Consume type t [@@deriving irmin] type hash type node type pnode type contents - type stream val is_empty : t -> bool val empty : unit -> t @@ -280,20 +218,14 @@ module type Env = sig (** {2 Modes} *) - val set_mode : t -> kind -> mode -> unit + val set_mode : t -> mode -> unit - val with_set_produce : + val with_produce : (t -> start_serialise:(unit -> unit) -> 'a Lwt.t) -> 'a Lwt.t - val with_set_consume : + val with_consume : (t -> stop_deserialise:(unit -> unit) -> 'a Lwt.t) -> 'a Lwt.t - val with_stream_produce : - (t -> to_stream:(unit -> stream) -> 'a Lwt.t) -> 'a Lwt.t - - val with_stream_consume : - stream -> (t -> is_empty:(unit -> bool) -> 'a Lwt.t) -> 'a Lwt.t - (** {2 Interactions With [Tree]} *) val add_contents_from_store : t -> hash -> contents -> unit @@ -315,17 +247,7 @@ module type Proof = sig exception Bad_proof of { context : string } - type bad_stream_exn = - | Stream_too_long of { context : string; reason : string } - | Stream_too_short of { context : string; reason : string } - | Proof_mismatch of { context : string; reason : string } - - exception Bad_stream of bad_stream_exn - val bad_proof_exn : string -> 'a - val bad_stream_exn : string -> string -> 'a - val bad_stream_too_long : string -> string -> 'a - val bad_stream_too_short : string -> string -> 'a module Make (C : Type.S) @@ -354,5 +276,4 @@ module type Proof = sig and type contents := B.Contents.Val.t and type node := B.Node.Val.t and type pnode := B.Node_portable.t - and type stream := P.stream end diff --git a/src/irmin/store_intf.ml b/src/irmin/store_intf.ml index 8eee4b97bb..09ba421064 100644 --- a/src/irmin/store_intf.ml +++ b/src/irmin/store_intf.ml @@ -480,11 +480,11 @@ module type S_generic_key = sig (** {1 Proofs} *) - type ('proof, 'result) producer := + type 'result producer := repo -> kinded_key -> (tree -> (tree * 'result) Lwt.t) -> - ('proof * 'result) Lwt.t + (Proof.t * 'result) Lwt.t (** [produce r h f] runs [f] on top of a real store [r], producing a proof and a result using the initial root hash [h]. @@ -494,15 +494,11 @@ module type S_generic_key = sig Calling [produce_proof] recursively has an undefined behaviour. *) - type verifier_error = - [ `Proof_mismatch of string - | `Stream_too_long of string - | `Stream_too_short of string ] - [@@deriving irmin] + type verifier_error = [ `Proof_mismatch of string ] [@@deriving irmin] (** The type for errors associated with functions that verify proofs. *) - type ('proof, 'result) verifier := - 'proof -> + type 'result verifier := + Proof.t -> (tree -> (tree * 'result) Lwt.t) -> (tree * 'result, verifier_error) result Lwt.t (** [verify p f] runs [f] in checking mode. [f] is a function that takes a @@ -532,42 +528,17 @@ module type S_generic_key = sig The result is [Error _] if the proof is rejected: - - For tree proofs: when [p.before] is different from the hash of - [p.state]; - - For tree and stream proofs: when [p.after] is different from the hash - of [f p.state]; - - For tree and stream proofs: when [f p.state] tries to access paths - invalid paths in [p.state]; - - For stream proofs: when the proof is not empty once [f] is done. *) - - type tree_proof := Proof.tree Proof.t - (** The type for tree proofs. + - when [p.before] is different from the hash of [p.state]; + - when [p.after] is different from the hash of [f p.state]; + - when [f p.state] tries to access paths invalid paths in [p.state]; *) - Guarantee that the given computation performs exactly the same state - operations as the generating computation, *in some order*. *) - - val produce_proof : (tree_proof, 'a) producer + val produce_proof : 'a producer (** [produce_proof] is the producer of tree proofs. *) - val verify_proof : (tree_proof, 'a) verifier + val verify_proof : 'a verifier (** [verify_proof] is the verifier of tree proofs. *) val hash_of_proof_state : Proof.tree -> kinded_hash - - type stream_proof := Proof.stream Proof.t - (** The type for stream proofs. - - Guarantee that the given computation performs exactly the same state - operations as the generating computation, in the exact same order. - - Calling [fold] with [order = `Undefined] during the - production/verification of streamed proofs is undefined. *) - - val produce_stream : (stream_proof, 'a) producer - (** [produce_stream] is the producer of stream proofs. *) - - val verify_stream : (stream_proof, 'a) verifier - (** [verify_stream] is the verifier of stream proofs. *) end (** {1 Reads} *) diff --git a/src/irmin/tree.ml b/src/irmin/tree.ml index 8a0153558f..7090c53e93 100644 --- a/src/irmin/tree.ml +++ b/src/irmin/tree.ml @@ -2757,15 +2757,15 @@ module Make (P : Backend.S) = struct let to_tree p = let env = Env.empty () in - Env.set_mode env Env.Set Env.Deserialise; + Env.set_mode env Env.Deserialise; let h = load_proof ~env (state p) Fun.id in let tree = pruned_with_env ~env h in - Env.set_mode env Env.Set Env.Consume; + Env.set_mode env Env.Consume; tree end let produce_proof repo kinded_key f = - Env.with_set_produce @@ fun env ~start_serialise -> + Env.with_produce @@ fun env ~start_serialise -> let tree = import_with_env ~env repo kinded_key in let+ tree_after, result = f tree in let after = hash tree_after in @@ -2780,18 +2780,8 @@ module Make (P : Backend.S) = struct let kinded_hash = Node.weaken_value kinded_key in (Proof.v ~before:kinded_hash ~after proof, result) - let produce_stream repo kinded_key f = - Env.with_stream_produce @@ fun env ~to_stream -> - let tree = import_with_env ~env repo kinded_key in - let+ tree_after, result = f tree in - let after = hash tree_after in - clear tree; - let proof = to_stream () in - let kinded_hash = Node.weaken_value kinded_key in - (Proof.v ~before:kinded_hash ~after proof, result) - let verify_proof_exn p f = - Env.with_set_consume @@ fun env ~stop_deserialise -> + Env.with_consume @@ fun env ~stop_deserialise -> let before = Proof.before p in let after = Proof.after p in (* First convert to proof to [Env] *) @@ -2819,11 +2809,7 @@ module Make (P : Backend.S) = struct h.context pp_hash h.hash | e -> raise e) - type verifier_error = - [ `Proof_mismatch of string - | `Stream_too_long of string - | `Stream_too_short of string ] - [@@deriving irmin] + type verifier_error = [ `Proof_mismatch of string ] [@@deriving irmin] let verify_proof p f = Lwt.catch @@ -2835,49 +2821,6 @@ module Make (P : Backend.S) = struct Lwt.return (Error (`Proof_mismatch e.context)) | e -> Lwt.fail e) - let verify_stream_exn p f = - let before = Proof.before p in - let after = Proof.after p in - let stream = Proof.state p in - Env.with_stream_consume stream @@ fun env ~is_empty -> - let tree = pruned_with_env ~env before in - Lwt.catch - (fun () -> - let+ tree_after, result = f tree in - if not (is_empty ()) then - Irmin_proof.bad_stream_too_long "verify_stream" - "did not consume the full stream"; - if not (equal_kinded_hash after (hash tree_after)) then - Irmin_proof.bad_stream_exn "verify_stream" "invalid after hash"; - (tree_after, result)) - (function - | Pruned_hash h -> - Fmt.kstr - (Irmin_proof.bad_stream_exn "verify_stream") - "%s is trying to read through a blinded node or object (%a)" - h.context pp_hash h.hash - | e -> raise e) - - let verify_stream p f = - Lwt.catch - (fun () -> - let+ r = verify_stream_exn p f in - Ok r) - (function - | Irmin_proof.Bad_stream (Stream_too_long e) -> - Fmt.kstr - (fun e -> Lwt.return (Error (`Stream_too_long e))) - "Bad_stream %s: %s" e.context e.reason - | Irmin_proof.Bad_stream (Stream_too_short e) -> - Fmt.kstr - (fun e -> Lwt.return (Error (`Stream_too_short e))) - "Bad_stream %s: %s" e.context e.reason - | Irmin_proof.Bad_stream (Proof_mismatch e) -> - Fmt.kstr - (fun e -> Lwt.return (Error (`Proof_mismatch e))) - "Bad_stream %s: %s" e.context e.reason - | e -> Lwt.fail e) - let hash_of_proof_state state = let env = Env.empty () in Proof.load_proof ~env state Fun.id diff --git a/src/irmin/tree_intf.ml b/src/irmin/tree_intf.ml index 5ab2a5eae7..844d07226e 100644 --- a/src/irmin/tree_intf.ml +++ b/src/irmin/tree_intf.ml @@ -371,7 +371,7 @@ module type S = sig type irmin_tree - val to_tree : tree t -> irmin_tree + val to_tree : t -> irmin_tree (** [to_tree p] is the tree [t] representing the tree proof [p]. Blinded parts of the proof will raise [Dangling_hash] when traversed. *) end @@ -474,32 +474,21 @@ module type Sigs = sig val to_backend_portable_node : node -> B.Node_portable.t Lwt.t val of_backend_node : B.Repo.t -> B.Node.value -> node - type ('proof, 'result) producer := + type 'result producer := B.Repo.t -> kinded_key -> (t -> (t * 'result) Lwt.t) -> - ('proof * 'result) Lwt.t + (Proof.t * 'result) Lwt.t - type verifier_error = - [ `Proof_mismatch of string - | `Stream_too_long of string - | `Stream_too_short of string ] - [@@deriving irmin] + type verifier_error = [ `Proof_mismatch of string ] [@@deriving irmin] - type ('proof, 'result) verifier := - 'proof -> + type 'result verifier := + Proof.t -> (t -> (t * 'result) Lwt.t) -> (t * 'result, verifier_error) result Lwt.t - type tree_proof := Proof.tree Proof.t - - val produce_proof : (tree_proof, 'a) producer - val verify_proof : (tree_proof, 'a) verifier + val produce_proof : 'a producer + val verify_proof : 'a verifier val hash_of_proof_state : Proof.tree -> kinded_hash - - type stream_proof := Proof.stream Proof.t - - val produce_stream : (stream_proof, 'a) producer - val verify_stream : (stream_proof, 'a) verifier end end diff --git a/test/irmin-pack/test_tree.ml b/test/irmin-pack/test_tree.ml index afbb0ac1fe..64c0c437a2 100644 --- a/test/irmin-pack/test_tree.ml +++ b/test/irmin-pack/test_tree.ml @@ -109,15 +109,8 @@ module Make (Conf : Irmin_pack.Conf.S) = struct let+ t, () = Store.Tree.produce_proof repo hash (run ops) in t - let stream_of_ops repo hash ops : _ Lwt.t = - let+ t, () = Store.Tree.produce_stream repo hash (run ops) in - t - - let tree_proof_t = Tree.Proof.t Tree.Proof.tree_t - let stream_proof_t = Tree.Proof.t Tree.Proof.stream_t - let bin_of_proof = Irmin.Type.(unstage (to_bin_string tree_proof_t)) - let proof_of_bin = Irmin.Type.(unstage (of_bin_string tree_proof_t)) - let bin_of_stream = Irmin.Type.(unstage (to_bin_string stream_proof_t)) + let bin_of_proof = Irmin.Type.(unstage (to_bin_string Tree.Proof.t)) + let proof_of_bin = Irmin.Type.(unstage (of_bin_string Tree.Proof.t)) end module Default = Make (Conf) @@ -300,7 +293,7 @@ let test_proofs ctxt ops = (* test encoding *) let enc = bin_of_proof proof in let dec = proof_of_bin enc in - Alcotest.(check_repr tree_proof_t) "same proof" proof dec; + Alcotest.(check_repr Tree.Proof.t) "same proof" proof dec; (* test equivalence *) let tree_proof = Tree.Proof.to_tree proof in @@ -438,15 +431,6 @@ let test_large_proofs () = let enc_32 = bin_of_proof proof in let* () = close ctxt in - (* Build a stream proof *) - let* ctxt = init_tree bindings in - let key = - match Tree.key ctxt.tree with Some (`Node k) -> k | _ -> assert false - in - let* proof = stream_of_ops ctxt.repo (`Node key) ops in - let s_enc_32 = bin_of_stream proof in - let* () = close ctxt in - (* Build a proof on a large store (branching factor = 2) *) let* ctxt = Binary.init_tree bindings in let key = @@ -458,35 +442,17 @@ let test_large_proofs () = let enc_2 = Binary.bin_of_proof proof in let* () = Binary.close ctxt in - (* Build a stream proof *) - let* ctxt = Binary.init_tree bindings in - let key = - match Binary.Store.Tree.key ctxt.tree with - | Some (`Node k) -> k - | _ -> assert false - in - let* proof = Binary.stream_of_ops ctxt.repo (`Node key) ops in - let s_enc_2 = Binary.bin_of_stream proof in - let* () = Binary.close ctxt in - - Lwt.return - ( n, - String.length enc_32 / 1024, - String.length s_enc_32 / 1024, - String.length enc_2 / 1024, - String.length s_enc_2 / 1024 ) + Lwt.return (n, String.length enc_32 / 1024, String.length enc_2 / 1024) in let* a = compare_proofs 1 in let* b = compare_proofs 100 in let* c = compare_proofs 1_000 in let+ d = compare_proofs 10_000 in List.iter - (fun (n, k32, sk32, k2, sk2) -> + (fun (n, k32, k2) -> Fmt.pr "Size of Merkle proof for %d operations:\n" n; Fmt.pr "- Merkle B-trees (32 children) : %dkB\n%!" k32; - Fmt.pr "- stream Merkle B-trees (32 children): %dkB\n%!" sk32; - Fmt.pr "- binary Merkle trees : %dkB\n%!" k2; - Fmt.pr "- stream binary Merkle trees : %dkB\n%!" sk2) + Fmt.pr "- binary Merkle trees : %dkB\n%!" k2) [ a; b; c; d ] module Custom = Make (struct @@ -504,8 +470,7 @@ end) module P = Custom.Tree.Proof -let pp_proof = Irmin.Type.pp (P.t P.tree_t) -let pp_stream = Irmin.Type.pp (P.t P.stream_t) +let pp_proof = Irmin.Type.pp P.t let check_hash h s = let s' = Irmin.Type.(to_string Hash.t) h in @@ -544,68 +509,7 @@ let test_extenders () = (Irmin.Type.pp Custom.Tree.verifier_error_t) e in - let* () = Lwt_list.iter_s check_proof [ bindings; bindings2; bindings3 ] in - - let check_stream bindings = - let* ctxt = Custom.init_tree bindings in - let key = Custom.Tree.key ctxt.tree |> Option.get in - let* p, () = Custom.Tree.produce_stream ctxt.repo key f in - [%log.debug "Verifying stream %a" pp_stream p]; - let+ r = Custom.Tree.verify_stream p f in - match r with - | Ok (_, ()) -> () - | Error e -> - Alcotest.failf "check_stream: %a" - (Irmin.Type.pp Custom.Tree.verifier_error_t) - e - in - Lwt_list.iter_s check_stream [ bindings; bindings2; bindings3 ] - -let test_hardcoded_stream () = - let bindings = - [ ([ "00100" ], "x"); ([ "00101" ], "y"); ([ "00110" ], "z") ] - in - let fail elt = - Alcotest.failf "Unexpected elt in stream %a" (Irmin.Type.pp P.elt_t) elt - in - let* ctxt = Custom.init_tree bindings in - let key = Custom.Tree.key ctxt.tree |> Option.get in - let f t = - let path = [ "00100" ] in - let+ v = Custom.Tree.get t path in - Alcotest.(check ~pos:__POS__ string) "" (List.assoc path bindings) v; - (t, ()) - in - let* p, () = Custom.Tree.produce_stream ctxt.repo key f in - let state = P.state p in - let counter = ref 0 in - Seq.iter - (fun elt -> - (match !counter with - | 0 -> ( - match elt with - | P.Inode_extender { length; segments = [ 0; 0; 1 ]; proof = h } - when length = 3 -> - check_hash h "25c1a3d3bb7e5124cf61954851d0c9ccf5113d4e" - | _ -> fail elt) - | 1 -> ( - match elt with - | P.Inode { length; proofs = [ (0, h1); (1, h0) ] } when length = 3 -> - check_hash h0 "8410f4d1be1d571f0d63638927d42c7c1c6f3df1"; - check_hash h1 "580c8955c438ca5b1f94d2f4eb712a85e2634b70" - | _ -> fail elt) - | 2 -> ( - match elt with - | P.Node [ ("00100", h0); ("00101", h1) ] -> - check_contents_hash h0 "11f6ad8ec52a2984abaafd7c3b516503785c2072"; - check_contents_hash h1 "95cb0bfd2977c761298d9624e4b4d4c72a39974a" - | _ -> fail elt) - | 3 -> ( match elt with P.Contents "x" -> () | _ -> fail elt) - | _ -> fail elt); - incr counter) - state; - if !counter <> 4 then Alcotest.fail "Not enough elements in the stream"; - Lwt.return_unit + Lwt_list.iter_s check_proof [ bindings; bindings2; bindings3 ] let test_hardcoded_proof () = let bindings = @@ -655,66 +559,6 @@ let tree_of_list ls = let tree = Tree.empty () in Lwt_list.fold_left_s (fun tree (k, v) -> Tree.add tree k v) tree ls -let test_proof_exn _ = - let x = "x" in - let y = "y" in - let hx = Store.Contents.hash x in - let hy = Store.Contents.hash y in - let stream_elt1 : P.elt = Contents y in - let stream_elt2 : P.elt = Contents x in - let stream_elt3 : P.elt = - Node [ ("bx", `Contents (hx, ())); ("by", `Contents (hy, ())) ] - in - let* tree = tree_of_list [ ([ "bx" ], "x"); ([ "by" ], "y") ] in - let hash = Tree.hash tree in - - let stream_all = - P.v ~before:(`Node hash) ~after:(`Node hash) - (List.to_seq [ stream_elt3; stream_elt2; stream_elt1 ]) - in - let stream_short = - P.v ~before:(`Node hash) ~after:(`Node hash) - (List.to_seq [ stream_elt3; stream_elt2 ]) - in - let f_all t = - let* _ = Custom.Tree.find t [ "bx" ] in - let+ _ = Custom.Tree.find t [ "by" ] in - (t, ()) - in - let f_short t = - let+ _ = Custom.Tree.find t [ "bx" ] in - (t, ()) - in - (* Test the Stream_too_long error. *) - let* r = Custom.Tree.verify_stream stream_all f_short in - let* () = - match r with - | Error (`Stream_too_long _) -> Lwt.return_unit - | _ -> Alcotest.fail "expected Stream_too_long error" - in - (* Test the Stream_too_short error. *) - let* r = Custom.Tree.verify_stream stream_short f_all in - let* () = - match r with - | Error (`Stream_too_short _) -> Lwt.return_unit - | _ -> Alcotest.fail "expected Stream_too_short error" - in - (* Test the correct usecase. *) - let* r = Custom.Tree.verify_stream stream_all f_all in - let* () = - match r with - | Ok (_, ()) -> Lwt.return_unit - | Error e -> ( - match e with - | `Proof_mismatch str -> - Alcotest.failf "unexpected Proof_mismatch error: %s" str - | `Stream_too_long str -> - Alcotest.failf "unexpected Stream_too_long error: %s" str - | `Stream_too_short str -> - Alcotest.failf "unexpected Stream_too_short error: %s" str) - in - Lwt.return_unit - let test_reexport_node () = let* tree = Store.Tree.add (Store.Tree.empty ()) [ "foo"; "a" ] "a" in let* repo1 = Store.Repo.v (config ~fresh:true root) in @@ -768,12 +612,8 @@ let tests = test_large_proofs); Alcotest_lwt.test_case "test extenders in stream proof" `Quick (fun _switch -> test_extenders); - Alcotest_lwt.test_case "test hardcoded stream proof" `Quick (fun _switch -> - test_hardcoded_stream); Alcotest_lwt.test_case "test hardcoded proof" `Quick (fun _switch -> test_hardcoded_proof); - Alcotest_lwt.test_case "test stream proof exn" `Quick (fun _switch -> - test_proof_exn); Alcotest_lwt.test_case "test reexport node" `Quick (fun _switch -> test_reexport_node); ]