[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: (ITS#7829) MDB mdb_cursor_del causes records to be skipped



Howard Chu writes:
> Thanks, confirmed. In a nested rebalance, doing a page merge with the
> neighbor on the left was discarding the rebalanced state. Fixed now in
> mdb.master

Nope. 69edafe28aef03b965c3d911be9ab8e340f914e1 "ITS#7829 more for
prev commit" breaks test054-syncreplication-parallel-load in RE24
(with normal pagesize), every 2nd run or so.

Running ./scripts/test054-syncreplication-parallel-load for mdb...
running defines.sh
Starting provider slapd on TCP/IP port 9011...
Using ldapsearch to check that provider slapd is running...
Using ldapadd to create the context prefix entry in the provider...
Starting consumer slapd on TCP/IP port 9014...
Using ldapsearch to check that consumer slapd is running...
Using ldapadd to populate the provider directory...
Waiting 7 seconds for syncrepl to receive changes...
Stopping the provider, sleeping 10 seconds and restarting it...
Using ldapsearch to check that provider slapd is running...
Waiting 10 seconds to let the system catch up
Using ldapmodify to modify provider directory...
ldapmodify failed (80)!

slapd.1.log says:

533bbaa2 mdb_modrdn: new ndn=cn=rosco p. coltrane,ou=retired,ou=people,dc=example,dc=com
533bbaa2 => mdb_dn2id("cn=rosco p. coltrane,ou=retired,ou=people,dc=example,dc=com")
533bbaa2 <= mdb_dn2id: get failed: MDB_NOTFOUND: No matching key/data pair found (-30798)
533bbaa2 => mdb_dn2id_delete 0x2a
533bbaa2 <= mdb_dn2id_delete 0x2a: -30798
533bbaa2 <=- mdb_modrdn: dn2id del failed: MDB_NOTFOUND: No matching key/data pair found (-30798)
533bbaa2 send_ldap_result: conn=1002 op=8 p=3
533bbaa2 send_ldap_result: err=80 matched="" text="DN index delete fail"


This dn2id.c assert() would catch it. Passing -MDB_SET to catch it in
a patched liblmdb before that.

diff --git a/servers/slapd/back-mdb/dn2id.c b/servers/slapd/back-mdb/dn2id.c
index ceacb17..8cb9d91 100644
--- a/servers/slapd/back-mdb/dn2id.c
+++ b/servers/slapd/back-mdb/dn2id.c
@@ -243,3 +243,4 @@ mdb_dn2id_delete(
 		do {
-			rc = mdb_cursor_get( mc, &key, &data, MDB_SET );
+			rc = mdb_cursor_get( mc, &key, &data, -MDB_SET );
+			assert(!rc);
 			if ( !rc ) {

mdb.c fails at 1st assert, the rest is for handling -MDB_SET:

diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index 10a8358..71b0b93 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -5348,2 +5348,3 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
 		rc = mc->mc_dbx->md_cmp(key, &nodekey);
+		mdb_cassert(mc, op != (MDB_cursor_op)-MDB_SET || rc >= 0 || mc->mc_top);
 		if (rc == 0) {
@@ -5460,3 +5461,3 @@ set1:
 		if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
-			if (op == MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) {
+			if (op == MDB_SET || op == (MDB_cursor_op)-MDB_SET || op == MDB_SET_KEY || op == MDB_SET_RANGE) {
 				rc = mdb_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
@@ -5651,2 +5652,3 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
 	case MDB_SET_RANGE:
+	case (MDB_cursor_op)-MDB_SET:
 		if (key == NULL) {
@@ -5747,2 +5749,3 @@ fetchm:
 		DPRINTF(("unhandled/unimplemented cursor operation %u", op));
+		mdb_cassert(mc, 0); /* just checking that we never get here */
 		rc = EINVAL;

#2  mdb_assert_fail ()
#3  mdb_cursor_set (mc=0x7fabb4102bb0, key=0x7fabbf276260,
    data=0x7fabbf276250, op=4294967281, exactp=0x7fabbf2761f0) at mdb.c:5349
#4  mdb_cursor_get (mc=0x7fabb4102bb0, key=0x7fabbf276260,
    data=0x7fabbf276250, op=4294967281) at mdb.c:5657
#5  mdb_dn2id_delete (op=0x7fabb4000960, mc=0x7fabb4102bb0, id=42, nsubs=1)
    at dn2id.c:244
#6  mdb_modrdn (op=0x7fabb4000960, rs=0x7fabbf276910) at modrdn.c:476
#7  overlay_op_walk (op=0x7fabb4000960, rs=0x7fabbf276910, which=op_modrdn,
    oi=0x1c2c120, on=0x0) at backover.c:671
#8  over_op_func (op=0x7fabb4000960, rs, which) at backover.c:723
#9  fe_op_modrdn (op=0x7fabb4000960, rs=0x7fabbf276910) at modrdn.c:314
#10 do_modrdn (op=0x7fabb4000960, rs=0x7fabbf276910) at modrdn.c:186
#11 connection_operation (ctx=0x7fabbf276a70, arg_v=0x7fabb4000960)
    at connection.c:1155
#12 connection_read_thread (ctx=0x7fabbf276a70, argv) at connection.c:1291
#13 ldap_int_thread_pool_wrapper (xpool=0x1bd7370) at tpool.c:688

(gdb) frame 3
#3  mdb_cursor_set (mc=0x7fabb4102bb0, key=0x7fabbf276260,
    data=0x7fabbf276250, op=4294967281, exactp=0x7fabbf2761f0) at mdb.c:5349
5349            mdb_cassert(mc, op != (MDB_cursor_op)-MDB_SET || rc >= 0 || mc->mc_top);
(gdb) info locals
nodekey = {mv_size = 8, mv_data = 0x7fabb01092da}
rc = -8
mp = 0x7fabb0108310
leaf = 0x7fabb01092d2
__FUNCTION__ = "mdb_cursor_set"
(gdb) set output-radix 16
(gdb) p *mp
$1 = {mp_p = {p_pgno = 0x51, p_next = 0x51}, mp_pad = 0x0, mp_flags = 0x12,
      mp_pb = {pb = {pb_lower = 0x44, pb_upper = 0xa2a}, pb_pages = 0xa2a0044},
      mp_ptrs = {0xfc2}}
(gdb) p *leaf
$2 = {mn_lo = 0x2e, mn_hi = 0x0, mn_flags = 0x0, mn_ksize = 0x8,
      mn_data = "\020"}
(gdb) p *(MDB_ID*)key->mv_data
$3 = 0x8
(gdb) p *(MDB_ID*)nodekey.mv_data
$4 = 0x10

-- 
Hallvard