Skip to content

Commit

Permalink
Aristo db update delete functionality (status-im#1621)
Browse files Browse the repository at this point in the history
* Fix missing branch checks in transcoder

why:
  Symmetry problem. `Blobify()` allowed for encoding degenerate branch
  vertices while `Deblobify()` rejected decoding wrongly encoded data.

* Update memory backend so that it rejects storing bogus vertices.

why:
  Error behaviour made similar to the rocks DB backend.

* Make sure that leaf vertex IDs are not repurposed

why:
  This makes it easier to record leaf node changes

* Update error return code for next()/right() traversal

why:
  Returning offending vertex ID (besides error code) helps debugging

* Update Merkle hasher for deleted nodes

why:
  Not implemented, yet

also:
  Provide cache & backend consistency check functions. This was
  partly re-implemented from `hashifyCheck()`

* Simplify some unit tests

* Fix delete function

why:
  Was conceptually wrong
  • Loading branch information
mjfh authored Jun 30, 2023
1 parent aa6d478 commit dd1c8ed
Show file tree
Hide file tree
Showing 25 changed files with 1,495 additions and 532 deletions.
93 changes: 93 additions & 0 deletions nimbus/db/aristo/aristo_check.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# nimbus-eth1
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

## Aristo DB -- Consistency checks
## ===============================
##
{.push raises: [].}

import
std/[algorithm, sequtils, sets, tables],
eth/common,
stew/[interval_set, results],
./aristo_init/[aristo_memory, aristo_rocksdb],
"."/[aristo_desc, aristo_get, aristo_init, aristo_vid],
./aristo_hashify/hashify_helper,
./aristo_check/[check_be, check_cache]

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc checkCache*(
db: AristoDb; # Database, top layer
relax = false; # Check existing hashes only
): Result[void,(VertexID,AristoError)] =
## Verify that the cache structure is correct as it would be after `merge()`
## and `hashify()` operations. Unless `relaxed` is set `true` it would not
## fully check against the backend, which is typically not applicable after
## `delete()` operations.
##
## The following is verified:
##
## * Each `sTab[]` entry has a valid vertex which can be compiled as a node.
## If `relax` is set `false`, the Merkle hashes are recompiled and must
## match.
##
## * The hash table `kMap[]` and its inverse lookup table `pAmk[]` must
## correnspond.
##
if relax:
let rc = db.checkCacheRelaxed()
if rc.isErr:
return rc
else:
let rc = db.checkCacheStrict()
if rc.isErr:
return rc

db.checkCacheCommon()


proc checkBE*(
db: AristoDb; # Database, top layer
relax = true; # Not re-compiling hashes if `true`
cache = true; # Also verify cache
): Result[void,(VertexID,AristoError)] =
## Veryfy database backend structure. If the argument `relax` is set `false`,
## all necessary Merkle hashes are compiled and verified. If the argument
## `cache` is set `true`, the cache is also checked so that a `safe()`
## operation will leave the backend consistent.
##
## The following is verified:
##
## * Each vertex ID on the structural table can be represented as a Merkle
## patricia Tree node. If `relax` is set `false`, the Merkle hashes are
## all recompiled and must match.
##
## * The set of free vertex IDa as potentally suppliedby the ID generator
## state is disjunct to the set of already used vertex IDs on the database.
## Moreover, the union of both sets is equivalent to the set of positive
## `uint64` numbers.
##
if not db.backend.isNil:
let be = db.to(TypedBackendRef)
case be.kind:
of BackendMemory:
return be.MemBackendRef.checkBE(db, cache=cache, relax=relax)
of BackendRocksDB:
return be.RdbBackendRef.checkBE(db, cache=cache, relax=relax)
of BackendNone:
discard
ok()

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
175 changes: 175 additions & 0 deletions nimbus/db/aristo/aristo_check/check_be.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# nimbus-eth1
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
std/[algorithm, sequtils, sets, tables],
eth/common,
stew/interval_set,
../aristo_hashify/hashify_helper,
../aristo_init/[aristo_memory, aristo_rocksdb],
".."/[aristo_desc, aristo_get, aristo_vid]

const
Vid2 = @[VertexID(2)].toHashSet

# ------------------------------------------------------------------------------
# Private helper
# ------------------------------------------------------------------------------

proc invTo(s: IntervalSetRef[VertexID,uint64]; T: type HashSet[VertexID]): T =
## Convert the complement of the argument list `s` to a set of vertex IDs
## as it would appear with a vertex generator state list.
if s.total < high(uint64):
for w in s.increasing:
if w.maxPt == high(VertexID):
result.incl w.minPt # last interval
else:
for pt in w.minPt .. w.maxPt:
result.incl pt

proc toNodeBe(
vtx: VertexRef; # Vertex to convert
db: AristoDb; # Database, top layer
): Result[NodeRef,VertexID] =
## Similar to `toNode()` but fetching from the backend only
case vtx.vType:
of Leaf:
return ok NodeRef(vType: Leaf, lPfx: vtx.lPfx, lData: vtx.lData)
of Branch:
let node = NodeRef(vType: Branch, bVid: vtx.bVid)
var missing: seq[VertexID]
for n in 0 .. 15:
let vid = vtx.bVid[n]
if vid.isValid:
let rc = db.getKeyBackend vid
if rc.isOk and rc.value.isValid:
node.key[n] = rc.value
else:
return err(vid)
else:
node.key[n] = VOID_HASH_KEY
return ok node
of Extension:
let
vid = vtx.eVid
rc = db.getKeyBackend vid
if rc.isOk and rc.value.isValid:
let node = NodeRef(vType: Extension, ePfx: vtx.ePfx, eVid: vid)
node.key[0] = rc.value
return ok node
return err(vid)

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc checkBE*[T](
be: T; # backend descriptor
db: AristoDb; # Database, top layer
relax: bool; # Not compiling hashes if `true`
cache: bool; # Also verify cache
): Result[void,(VertexID,AristoError)] =
## Make sure that each vertex has a Merkle hash and vice versa. Also check
## the vertex ID generator state.
let vids = IntervalSetRef[VertexID,uint64].init()
discard vids.merge Interval[VertexID,uint64].new(VertexID(1),high(VertexID))

for (_,vid,vtx) in be.walkVtx:
if not vtx.isValid:
return err((vid,CheckBeVtxInvalid))
let rc = db.getKeyBackend vid
if rc.isErr or not rc.value.isValid:
return err((vid,CheckBeKeyMissing))

for (_,vid,key) in be.walkKey:
if not key.isvalid:
return err((vid,CheckBeKeyInvalid))
let rc = db.getVtxBackend vid
if rc.isErr or not rc.value.isValid:
return err((vid,CheckBeVtxMissing))
let rx = rc.value.toNodeBe db # backend only
if rx.isErr:
return err((vid,CheckBeKeyCantCompile))
if not relax:
let expected = rx.value.toHashKey
if expected != key:
return err((vid,CheckBeKeyMismatch))
discard vids.reduce Interval[VertexID,uint64].new(vid,vid)

# Compare calculated state against database state
block:
# Extract vertex ID generator state
var vGen: HashSet[VertexID]
for (_,_,w) in be.walkIdg:
vGen = vGen + w.toHashSet
let
vGenExpected = vids.invTo(HashSet[VertexID])
delta = vGenExpected -+- vGen # symmetric difference
if 0 < delta.len:
# Exclude fringe case when there is a single root vertex only
if vGenExpected != Vid2 or 0 < vGen.len:
return err((delta.toSeq.sorted[^1],CheckBeGarbledVGen))

# Check cache against backend
if cache:

# Check structural table
for (vid,vtx) in db.top.sTab.pairs:
# A `kMap[]` entry must exist.
if not db.top.kMap.hasKey vid:
return err((vid,CheckBeCacheKeyMissing))
if vtx.isValid:
# Register existing vid against backend generator state
discard vids.reduce Interval[VertexID,uint64].new(vid,vid)
else:
# Some vertex is to be deleted, the key must be empty
let lbl = db.top.kMap.getOrVoid vid
if lbl.isValid:
return err((vid,CheckBeCacheKeyNonEmpty))
# There must be a representation on the backend DB
if db.getVtxBackend(vid).isErr:
return err((vid,CheckBeCacheVidUnsynced))
# Register deleted vid against backend generator state
discard vids.merge Interval[VertexID,uint64].new(vid,vid)

# Check key table
for (vid,lbl) in db.top.kMap.pairs:
let vtx = db.getVtx vid
if not db.top.sTab.hasKey(vid) and not vtx.isValid:
return err((vid,CheckBeCacheKeyDangling))
if lbl.isValid and not relax:
if not vtx.isValid:
return err((vid,CheckBeCacheVtxDangling))
let rc = vtx.toNode db # compile cache first
if rc.isErr:
return err((vid,CheckBeCacheKeyCantCompile))
let expected = rc.value.toHashKey
if expected != lbl.key:
return err((vid,CheckBeCacheKeyMismatch))

# Check vGen
var tmp = AristoDB(top: AristoLayerRef(vGen: db.top.vGen))
tmp.vidReorg()
let
vGen = tmp.top.vGen.toHashSet
vGenExpected = vids.invTo(HashSet[VertexID])
delta = vGenExpected -+- vGen # symmetric difference
if 0 < delta.len:
# Exclude fringe case when there is a single root vertex only
if vGenExpected != Vid2 or 0 < vGen.len:
return err((delta.toSeq.sorted[^1],CheckBeCacheGarbledVGen))

ok()

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------
126 changes: 126 additions & 0 deletions nimbus/db/aristo/aristo_check/check_cache.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# nimbus-eth1
# Copyright (c) 2021 Status Research & Development GmbH
# Licensed under either of
# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
# http://www.apache.org/licenses/LICENSE-2.0)
# * MIT license ([LICENSE-MIT](LICENSE-MIT) or
# http://opensource.org/licenses/MIT)
# at your option. This file may not be copied, modified, or distributed
# except according to those terms.

{.push raises: [].}

import
std/[sequtils, sets, tables],
eth/common,
stew/results,
../aristo_hashify/hashify_helper,
".."/[aristo_desc, aristo_get]

# ------------------------------------------------------------------------------
# Public functions
# ------------------------------------------------------------------------------

proc checkCacheStrict*(
db: AristoDb; # Database, top layer
): Result[void,(VertexID,AristoError)] =
for (vid,vtx) in db.top.sTab.pairs:
let rc = vtx.toNode db
if rc.isErr:
return err((vid,CheckStkVtxIncomplete))

let lbl = db.top.kMap.getOrVoid vid
if not lbl.isValid:
return err((vid,CheckStkVtxKeyMissing))
if lbl.key != rc.value.toHashKey:
return err((vid,CheckStkVtxKeyMismatch))

let revVid = db.top.pAmk.getOrVoid lbl
if not revVid.isValid:
return err((vid,CheckStkRevKeyMissing))
if revVid != vid:
return err((vid,CheckStkRevKeyMismatch))

if 0 < db.top.pAmk.len and db.top.pAmk.len < db.top.sTab.len:
# Cannot have less changes than cached entries
return err((VertexID(0),CheckStkVtxCountMismatch))

ok()


proc checkCacheRelaxed*(
db: AristoDb; # Database, top layer
): Result[void,(VertexID,AristoError)] =
if 0 < db.top.pPrf.len:
for vid in db.top.pPrf:
let vtx = db.top.sTab.getOrVoid vid
if vtx.isValid:
let rc = vtx.toNode db
if rc.isErr:
return err((vid,CheckRlxVtxIncomplete))

let lbl = db.top.kMap.getOrVoid vid
if not lbl.isValid:
return err((vid,CheckRlxVtxKeyMissing))
if lbl.key != rc.value.toHashKey:
return err((vid,CheckRlxVtxKeyMismatch))

let revVid = db.top.pAmk.getOrVoid lbl
if not revVid.isValid:
return err((vid,CheckRlxRevKeyMissing))
if revVid != vid:
return err((vid,CheckRlxRevKeyMismatch))
else:
# Is be a deleted entry
let rc = db.getVtxBackend vid
if rc.isErr:
return err((vid,CheckRlxVidVtxBeMissing))
if not db.top.kMap.hasKey vid:
return err((vid,CheckRlxVtxEmptyKeyMissing))
if db.top.kMap.getOrVoid(vid).isValid:
return err((vid,CheckRlxVtxEmptyKeyExpected))
else:
for (vid,lbl) in db.top.kMap.pairs:
if lbl.isValid: # Otherwise to be deleted
let vtx = db.getVtx vid
if vtx.isValid:
let rc = vtx.toNode db
if rc.isOk:
if lbl.key != rc.value.toHashKey:
return err((vid,CheckRlxVtxKeyMismatch))

let revVid = db.top.pAmk.getOrVoid lbl
if not revVid.isValid:
return err((vid,CheckRlxRevKeyMissing))
if revVid != vid:
return err((vid,CheckRlxRevKeyMissing))
if revVid != vid:
return err((vid,CheckRlxRevKeyMismatch))
ok()


proc checkCacheCommon*(
db: AristoDb; # Database, top layer
): Result[void,(VertexID,AristoError)] =
# Some `kMap[]` entries may ne void indicating backend deletion
let kMapCount = db.top.kMap.values.toSeq.filterIt(it.isValid).len

if db.top.pAmk.len != kMapCount:
var knownKeys: HashSet[VertexID]
for (key,vid) in db.top.pAmk.pairs:
if not db.top.kMap.hasKey(vid):
return err((vid,CheckAnyRevVtxMissing))
if vid in knownKeys:
return err((vid,CheckAnyRevVtxDup))
knownKeys.incl vid
return err((VertexID(0),CheckAnyRevCountMismatch)) # should not apply(!)

for vid in db.top.pPrf:
if not db.top.kMap.hasKey(vid):
return err((vid,CheckAnyVtxLockWithoutKey))
ok()

# ------------------------------------------------------------------------------
# End
# ------------------------------------------------------------------------------

Loading

0 comments on commit dd1c8ed

Please sign in to comment.