Issue 5994 - replica segfault after syncing final entry
Summary: replica segfault after syncing final entry
Status: VERIFIED FIXED
Alias: None
Product: OpenLDAP
Classification: Unclassified
Component: historical (show other issues)
Version: unspecified
Hardware: All All
: --- normal
Target Milestone: ---
Assignee: OpenLDAP project
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2009-03-04 23:49 UTC by Quanah Gibson-Mount
Modified: 2019-11-20 18:03 UTC (History)
0 users

See Also:


Attachments

Note You need to log in before you can comment on or make changes to this issue.
Description Quanah Gibson-Mount 2009-03-04 23:49:29 UTC
Full_Name: Quanah Gibson-Mount
Version: 2.3.43
OS: Linux 2.6
URL: ftp://ftp.openldap.org/incoming/
Submission from: (NULL) (75.111.29.239)


With a replica doing a full refresh from the master, it segfaults on the final
entry.

The line in question is:
    rc = bdb_cache_find_id( op, ltid, eip->bei_id, &eip, 0, locker, &plock );

Core data:

Program terminated with signal 11, Segmentation fault.
#0  0x00002aaaaf76ddee in bdb_delete (op=0x41000840, rs=0x41000430) at
delete.c:178

There is only one active thread in the core:

(gdb) thr apply all bt

Thread 4 (process 23189):
#0  0x0000003167a075b5 in pthread_join () from /lib64/libpthread.so.0
#1  0x00002aaaaaabd817 in ldap_pvt_thread_join (thread=1082132800,
thread_return=0x0) at thr_posix.c:193
#2  0x0000000000429c79 in slapd_daemon () at daemon.c:2579
#3  0x0000000000412183 in main (argc=10, argv=0x7fffca15ec78) at main.c:859

Thread 3 (process 23190):
#0  0x0000003166ecec48 in ?? ()
#1  0x0000000000000000 in ?? ()

Thread 2 (process 23192):
#0  0x0000003167a0a4a6 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1  0x00002aaaaaabd8c4 in ldap_pvt_thread_cond_wait (cond=0x6579a50,
mutex=0x6579a28) at thr_posix.c:299
#2  0x00002aaaaaabc609 in ldap_int_thread_pool_wrapper (xpool=0x6579a20) at
tpool.c:466
#3  0x0000003167a062f7 in start_thread () from /lib64/libpthread.so.0
#4  0x0000003166ece85d in ?? ()
#5  0x0000000000000000 in ?? ()

Thread 1 (process 23191):
#0  0x00002aaaaf76ddee in bdb_delete (op=0x41000840, rs=0x41000430) at
delete.c:178
#1  0x00000000004a0fe9 in overlay_op_walk (op=0x41000840, rs=0x41000430,
which=op_delete, oi=0x66a3550, on=0x0) at backover.c:650
#2  0x00000000004a11ec in over_op_func (op=0x41000840, rs=0x41000430,
which=op_delete) at backover.c:702
#3  0x00000000004a1336 in over_op_delete (op=0x41000840, rs=0x41000430) at
backover.c:754
#4  0x00000000004985fc in syncrepl_del_nonpresent (op=0x41000840, si=0x66a3170,
uuids=0x0, cookiecsn=0x41000640) at syncrepl.c:2167
#5  0x0000000000493849 in do_syncrep2 (op=0x41000840, si=0x66a3170) at
syncrepl.c:823
#6  0x00000000004946e9 in do_syncrepl (ctx=0x41000e00, arg=0x66a3350) at
syncrepl.c:1102
#7  0x00002aaaaaabc56a in ldap_int_thread_pool_wrapper (xpool=0x6579a20) at
tpool.c:478
#8  0x0000003167a062f7 in start_thread () from /lib64/libpthread.so.0
#9  0x0000003166ece85d in ?? ()
#10 0x0000000000000000 in ?? ()

The operation data is:

(gdb) print *op
$1 = {o_hdr = 0x410009a0, o_tag = 74, o_time = 1236201040, o_tincr = 2166, o_bd
= 0x41000080, o_req_dn = {bv_len = 0, bv_val = 0x6902820 ""}, o_req_ndn =
{bv_len = 0,
    bv_val = 0x6b8dcd0 ""}, o_request = {oq_add = {rs_e = 0x2, rs_modlist =
0xffffffffffffffff}, oq_bind = {rb_method = 2, rb_cred = {bv_len =
18446744073709551615, bv_val = 0x0},
      rb_edn = {bv_len = 0, bv_val = 0x41000370 "\t"}, rb_ssf = 159460192,
rb_tmp_mech = {bv_len = 15, bv_val = 0x66a2ac0 "(objectclass=*)"}}, oq_compare =
{rs_ava = 0x2}, oq_modify = {
      rs_modlist = 0x2, rs_increment = -1}, oq_modrdn = {rs_newrdn = {bv_len =
2, bv_val = 0xffffffffffffffff <Address 0xffffffffffffffff out of bounds>},
rs_nnewrdn = {bv_len = 0,
        bv_val = 0x0}, rs_newSup = 0x41000370, rs_nnewSup = 0x9812b60,
rs_deleteoldrdn = 15}, oq_search = {rs_scope = 2, rs_deref = 0, rs_slimit = -1,
rs_tlimit = -1, rs_limit = 0x0,
      rs_attrsonly = 0, rs_attrs = 0x41000370, rs_filter = 0x9812b60,
rs_filterstr = {bv_len = 15, bv_val = 0x66a2ac0 "(objectclass=*)"}}, oq_abandon
= {rs_msgid = 2}, oq_cancel = {
      rs_msgid = 2}, oq_extended = {rs_reqoid = {bv_len = 2, bv_val =
0xffffffffffffffff <Address 0xffffffffffffffff out of bounds>}, rs_flags = 0,
rs_reqdata = 0x0}, oq_pwdexop = {
      rs_extended = {rs_reqoid = {bv_len = 2, bv_val = 0xffffffffffffffff
<Address 0xffffffffffffffff out of bounds>}, rs_flags = 0, rs_reqdata = 0x0},
rs_old = {bv_len = 1090519920,
        bv_val = 0x9812b60 "���5\221\t"}, rs_new = {bv_len = 15, bv_val =
0x66a2ac0 "(objectclass=*)"}, rs_mods = 0x0, rs_modtail = 0x0}}, o_abandon = 0,
o_cancel = 0, o_groups = 0x0,
  o_do_not_cache = 0 '\0', o_is_auth_check = 0 '\0', o_nocaching = 0 '\0',
o_delete_glue_parent = 0 '\0', o_no_schema_check = 0 '\0',
  o_ctrlflag = '\0' <repeats 12 times>, "\002", '\0' <repeats 18 times>,
o_controls = 0x41000a18, o_authz = {sai_method = 0, sai_mech = {bv_len = 0,
bv_val = 0x0}, sai_dn = {bv_len = 9,
      bv_val = 0x66a2690 "cn=config"}, sai_ndn = {bv_len = 9, bv_val = 0x66a0a20
"cn=config"}, sai_ssf = 256, sai_transport_ssf = 0, sai_tls_ssf = 256,
sai_sasl_ssf = 0}, o_ber = 0x0,
  o_res_ber = 0x0, o_callback = 0x980ebf0, o_ctrls = 0x0, o_csn = {bv_len = 32,
bv_val = 0x9417220 "20090304090011Z#000000#00#000000"}, o_private = 0x40fffef0,
o_next = {stqe_next = 0x0}}

(gdb) print *ltid
$3 = {mgrp = 0x675ea50, parent = 0x0, last_lsn = {file = 0, offset = 0}, txnid =
2147483701, tid = 0, off = 16056, lock_timeout = 0, expire = 0, txn_list = 0x0,
links = {tqe_next = 0x0,
    tqe_prev = 0x675ea58}, xalinks = {tqe_next = 0x0, tqe_prev = 0x0}, events =
{tqh_first = 0x0, tqh_last = 0x68dc978}, logs = {stqh_first = 0x0, stqh_last =
0x68dc988}, kids = {
    tqh_first = 0x0, tqh_last = 0x68dc998}, klinks = {tqe_next = 0x0, tqe_prev =
0x0}, api_internal = 0x0, cursors = 0, abort = 0x2aaaafa4ca70 <__txn_abort_pp>,
  commit = 0x2aaaafa4caf0 <__txn_commit_pp>, discard = 0x2aaaafa4c9f0
<__txn_discard_pp>, id = 0x2aaaafa4a690 <__txn_id>, prepare = 0x2aaaafa4bf70
<__txn_prepare>,
  set_timeout = 0x2aaaafa4b0b0 <__txn_set_timeout>, flags = 16}


And here's the last few lines of debug output when I ran slapd with '-d32767':

=> test_filter
    PRESENT
=> access_allowed: search access to "cn=serverX,cn=servers,cn=zimbra"
"objectClass" requested
<= root access granted
<= test_filter 6
send_ldap_result: conn=-1 op=0 p=3
send_ldap_result: err=0 matched="" text=""
==> bdb_delete:
bdb_dn2entry("")
entry_decode: ""
<= entry_decode()

(serverX is the last dn in an ldif produced with slapcat, and it's also the name
of the replica server).
Comment 1 Howard Chu 2009-03-05 07:06:14 UTC
changed notes
changed state Open to Suspended
Comment 2 Howard Chu 2009-06-23 00:26:52 UTC
moved from Incoming to Historical
Comment 3 OpenLDAP project 2014-08-01 21:05:04 UTC
2.3, empty suffix
Comment 4 Quanah Gibson-Mount 2019-11-20 18:03:42 UTC
changed state Suspended to Closed