[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: (ITS#8433) slapd deadlocks on shutdown in 3+ MMR configuration with active writes



quanah@openldap.org wrote:
> Full backtrace of all threads:
>
>
> Thread 4 (Thread 0x7f146ac9d700 (LWP 16805)):
> #0  pthread_cond_wait@@GLIBC_2.3.2 () at
> ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
> No locals.
> #1  0x00007f3c70fe8171 in ldap_pvt_thread_cond_wait (cond=0x1fa0038,
> mutex=0x1fa0010) at thr_posix.c:277
> No locals.
> #2  0x00007f3c70fe63c2 in ldap_pvt_thread_pool_destroy (tpool=0x7618c0
> <connection_pool>, run_pending=1) at tpool.c:817
>          pool = 0x1f763c0
>          pptr = 0x1f763c0
>          pq = 0x1fa0000
>          task = 0x7f3c716a61c8
>          i = 0
> #3  0x0000000000438967 in slapd_daemon_task (ptr=0x1d7bce8) at daemon.c:2829
>          l = 3
>          last_idle_check = 1464372736
>          ebadf = 0
>          tid = 0
> #4  0x00007f3c70552184 in start_thread (arg=0x7f146ac9d700) at
> pthread_create.c:312
>          __res = <optimized out>
>          pd = 0x7f146ac9d700
>          now = <optimized out>
>          unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139725667686144,
> -1093867468031317215, 0, 0, 139725667686848, 139725667686144,
> 1078920827726146337, 1056426161274956577},
>                mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data =
> {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
>          not_first_call = <optimized out>
>          pagesize_m1 = <optimized out>
>          sp = <optimized out>
>          freesize = <optimized out>
>          __PRETTY_FUNCTION__ = "start_thread"
> #5  0x00007f3c7027f37d in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
> No locals.

Thread 4 is waiting for the other threads to finish.
>
> Thread 3 (Thread 0x7f1468498700 (LWP 16810)):
> #0  0x00007f3c705593ad in read () at ../sysdeps/unix/syscall-template.S:81
> No locals.
> #1  0x00007f3c70dd2435 in sb_stream_read (sbiod=0x5d36630, buf=0x5a3c057,
> len=433) at sockbuf.c:490D0D
>          __PRETTY_FUNCTION__ = "sb_stream_read"

Thread 3 is inside OpenSSL, it thinks there are 433 bytes to read on this 
socket and it's waiting for them to arrive. There's not much we can do in 
slapd about this.

> #2  0x00007f3c70dd2e56 in sb_debug_read (sbiod=0x5d36d20, buf=0x5a3c057,
> len=433) at sockbuf.c:829
>          ret = 79
>          ebuf = "PjIh\024\177\000\000\017\000\000\000\000\000\000\000`\016\000\000\000\000\000\000'\000\000\000\000\000\000\000\250\062H\004\000\000\000\000'\000\000\000\000\000\000\000\240\361H\004\000\000\000\000P\026u\016\000\000\000\000\000\001\000\000\000\000\000\000\001\350#p<\177\000\000\000\000\000\000\000\000\000\0006364\037\000\000\000\000\000\000\006\000\000\000\000\000\000\000hxIh\t\000\000\000\300lIh\024\177\000\000\220jIh\024\177\000"
> #3  0x00007f3c7101ebd1 in tlso_bio_read (b=0x5a93110,
>      buf=0x5a3c057 "\004\areqType1\b\004\006modify02\004\005reqDN1)\004'uid=gtillman,ou=people,dc=zimbra,dc=com0\201\317\004\006reqMod1\201\304\004:zimbraAuthTokens:-
> 1988626989|1464372357970|8.7.0_RC1_1601\004\063entryCSN:=
> 20160527182235.762156Z#000000#003#000000\004."..., len=433) at tls_o.c:721
>          p = 0x5a6caa0
>          ret = 79
> #4  0x00007f3c6f19988b in BIO_read () from
> /opt/zimbra/common/lib/libcrypto.so.1.0.0
> No symbol table info available.
> #5  0x00007f3c6f501ffc in ssl3_read_n () from
> /opt/zimbra/common/lib/libssl.so.1.0.0
> No symbol table info available.
> #6  0x00007f3c6f503ebf in ssl3_read_bytes () from
> /opt/zimbra/common/lib/libssl.so.1.0.0
> No symbol table info available.
> #7  0x00007f3c6f50033b in ssl3_read () from
> /opt/zimbra/common/lib/libssl.so.1.0.0
> No symbol table info available.
> #8  0x00007f3c7101f093 in tlso_sb_read (sbiod=0x5d382e0, buf=0xcb4f93f, len=8)
> at tls_o.c:881
>          p = 0x5a6caa0
>          ret = 635655159814
>          err = 28
>          __PRETTY_FUNCTION__ = "tlso_sb_read"
> #9  0x00007f3c70dd2e56 in sb_debug_read (sbiod=0x5d37800, buf=0xcb4f93f, len=8)
> at sockbuf.c:829
>          ret = 0
>          ebuf = "\000\000\000\000\000\000\000\000\300\211Ih\024\177\000\000\000\207Ih\024\177\000\000\000\000\000\000\000\000\000\000pmIh\024\177\000\000v\202\376p<\177\000\000\000\000\000\000\000\000\000\000\240h%h\024\177\000\000]-K\000\000\000\000\000\300{Ih\024\177\000\000\240mIh\000\000\000\000v\202\376p<\177\000\000\000\000\000\000\000\000\000\000\320mIh\024\177\000\000]-K\000\000\000\000\000\300{Ih\024\177\000"
> #10 0x00007f3c70dd2291 in ber_int_sb_read (sb=0x5d363c0, buf=0xcb4f93f, len=8)
> at sockbuf.c:423
>          ret = 0
>          __PRETTY_FUNCTION__ = "ber_int_sb_read"
> #11 0x00007f3c70dcebbf in ber_get_next (sb=0x5d363c0, len=0x7f1468496f00,
> ber=0xcb4f930) at io.c:532
>          sblen = 8
>          buf = "\000\000\000\000\000\000"
>          tlen = 0
>          __PRETTY_FUNCTION__ = "ber_get_next"
> #12 0x00007f3c70fea22f in try_read1msg (ld=0x5d36d50, msgid=3, all=0,
> lc=0x3d9b320, result=0x7f1468497190) at result.c:491
>          ber = 0xcb4f930
>          newmsg = 0x0
>          l = 0x0
>          prev = 0x0
>          id = 0
>          tag = 0
>          len = 0
>          foundit = 0
>          lr = 0x0
>          tmplr = 0x0
>          dummy_lr = {lr_msgid = 0, lr_status = 0, lr_refcnt = 0, lr_outrefcnt =
> 0, lr_abandoned = 0, lr_origid = 0, lr_parentcnt = 0, lr_res_msgtype = 0,
> lr_res_errno = 0,
>            lr_res_error = 0x0, lr_res_matched = 0x0, lr_ber = 0x0, lr_conn = 0x0,
> lr_dn = {bv_len = 0, bv_val = 0x0}, lr_parent = 0x0, lr_child = 0x0, lr_refnext
> = 0x0, lr_prev =x0x0,
>            lr_next = 0x0}
>          tmpber = {ber_opts = {lbo_valid = -12608, lbo_options = 474, lbo_debug =
> 0}, ber_tag = 5341050, ber_len = 0, ber_usertag = 5014619, ber_buf = 0x0,
> ber_ptr = 0x0, ber_end = 0x0,
>            ber_sos_ptr = 0x574963a4 <error: Cannot access memory at address
> 0x574963a4>, ber_rwptr = 0x0, ber_memctx = 0x67}
>          rc = 0
>          refer_cnt = 0
>          hadref = 0
>          simple_request = 0
>          err = 0
>          lderr = 0
>          __PRETTY_FUNCTION__ = "try_read1msg"A%A#13 0x00007f3c70fe9e53 in
> wait4msg (ld=0x5d36d50, msgid=3, all=0, timeout=0x7f14684971f0,
> result=0x7f1468497190) at result.c:362
>          lnext = 0x0
>          serviced = 1
>          lc_ready = 1
>          rc = -2
>          tv = {tv_sec = 0, tv_usec = 0}
>          tv0 = {tv_sec = 0, tv_usec = 0}
>          start_time_tv = {tv_sec = 1464427439, tv_usec = 944352}
>          tvp = 0x7f14684970a0
>          lc = 0x3d9b320
>          __PRETTY_FUNCTION__ = "wait4msg"
> #14 0x00007f3c70fe9574 in ldap_result (ld=0x5d36d50, msgid=3, all=0,
> timeout=0x7f14684971f0, result=0x7f1468497190) at result.c:117
>          rc = 0
>          __PRETTY_FUNCTION__ = "ldap_result"
> #15 0x00000000004bd6b2 in do_syncrep2 (op=0x7f1468497720, si=0x1dacec0) at
> syncrepl.c:841

> Thread 2 (Thread 0x7f1467c97700 (LWP 16812)):
> #0  0x00007f3c705593ad in read () at ../sysdeps/unix/syscall-template.S:81
> No locals.
> #1  0x00007f3c70dd2435 in sb_stream_read (sbiod=0x3dac8b0, buf=0x23cc1a8,
> len=96) at sockbuf.c:493
>          __PRETTY_FUNCTION__ = "sb_stream_read"
> #2  0x00007f3c70dd2e56 in sb_debug_read (sbiod=0x3dac850, buf=0x23cc1a8, len=96)
> at sockbuf.c:829
>          ret = 416
>          ebuf = "PZ\311g\024\177\000\000\017\000\000\000\000\000\000\000`\016\000\000\000\000\000\000'\000\000\000\000\000\000\000\230\264\343\005\000\000\000\000'\000\000\000\000\000\000\000P~\344\005\000\000\000\000@C5Cru\016\000\000\000\000\000\001\000\000\000\000\000\000\001\350#p<\177\000\000\000\000\000\000\000\000\000\000\364\037\000\000\000\000\000\000\006\000\000\000\000\000\000\000hh\311g\n\000\000\000\300\\\311g\024\177\000\000\220Z\311g\024\177\000"
> #3  0x00007f3c7101ebd1 in tlso_bio_read (b=0x244b2d0,
>      buf=0x23cc1a8 "\240F0D\004\030\061.3.6.1.4.1.4203.1.9.1.2\004(0&\n\001\001\004\020\004@\320&\270\206\020\065\223\244\347B\322&#1507;\327\004\017rid=102,sid=001&#52715;\272\005\324a`\346\343\373\335\062\065\223\a\363\363Sv\003\003\003\003\063.1.9.1.2\004(0&\n\001\001\004\020\003\362\377\004\270\206\020\065\223\236\347B\322&#1507;\327\004\017rid=102,sid=001\027&#1500;{\026g\264\262\210\331U]\330#\020u\241&#642;\206\b\b\b\b\b\b\b\b\bntryCSN:=
> 20160527182329"..., len=96% a at tls_o.c:721
>          p = 0x3da51e0
>          ret = 416
> #4  0x00007f3c6f19988b in BIO_read () from
> /opt/zimbra/common/lib/libcrypto.so.1.0.0
> No symbol table info available.

Thread 2 is in the same state as thread 3.

-- 
   -- Howard Chu
   CTO, Symas Corp.           http://www.symas.com
   Director, Highland Sun     http://highlandsun.com/hyc/
   Chief Architect, OpenLDAP  http://www.openldap.org/project/