[Date Prev][Date Next] [Chronological] [Thread] [Top]

(ITS#6040) SEGV - hdb_cache_lru_purge / bdb_cache_delete_internal / ldap_pvt_thread_mutex_lock



Full_Name: Oskar Pearson
Version: 2.4.15
OS: Ubuntu hardy
URL: ftp://ftp.openldap.org/incoming/slapd-configs-qualica-1.tar.gz
Submission from: (NULL) (94.194.140.71)


This is the same machine as ITS #6033. Details included here for easy
reference:
Openldap 2.4.15 (with no patches) was self-compiled with no patches
(CFLAGS="-ggdb3 -O0" ./configure), linked against standard OS libraries (ubuntu
hardy) for BDB etc. BDB is 4.6.21-6ubuntu1. Kernel is 2.6.24-19-server, libc is
libc6-i686/2.7-10ubuntu3.

I do not have a debug log of this crash as it occurred on a live server, which
is too busy for that level of logging. I also do not have a replaceable
example.
I have core dumps and can print out whatever is necessary there - I appreciate
your time and help.

If I've missed some important info, please let me know and I'll rectify asap.

System overview:

We have a master server which replicates to > 100 machines for Samba
purposes
across ADSL lines with a VPN. These lines are quite busy, so replication writes
may be slow across the network, which may have implications for locks.

Config files are included at the url below.

I've realised through looking at the code and config files that I should
probably increase the dncachesize parameter, as the number of DNs in the db is >
2000. This would help reduce the possibility of the bug occurring, but I think
the bug will still exist.

gdb --core=/root/core.26366 servers/slapd/slapd
...
(gdb) bt
#0  0xb7dd3540 in pthread_mutex_lock () from /lib/tls/i686/cmov/libpthread.so.0
#1  0x0817beef in ldap_pvt_thread_mutex_lock (mutex=0x34) at thr_posix.c:296
#2  0x08144215 in bdb_cache_delete_internal (cache=0x82b6df4, e=0x84017d8,
decr=0) at cache.c:1335
#3  0x081430e5 in hdb_cache_lru_purge (bdb=0x82b6db8) at cache.c:749
#4  0x081438ba in hdb_cache_find_id (op=0x86c01d0, tid=0x86d0a30, id=1792,
eip=0x9f6f1e68, flag=2, lock=0x9f6f1e00) at cache.c:1016
#5  0x08115c29 in hdb_search (op=0x86c01d0, rs=0x9f6f3100) at search.c:705
#6  0x080f179a in overlay_op_walk (op=0x86c01d0, rs=0x9f6f3100, which=op_search,
oi=0x82d2b88, on=0x0) at backover.c:669
#7  0x080f19a2 in over_op_func (op=0x86c01d0, rs=0x9f6f3100, which=op_search) at
backover.c:721
#8  0x080f1a46 in over_op_search (op=0x86c01d0, rs=0x9f6f3100) at
backover.c:743
#9  0x080724ed in fe_op_search (op=0x86c01d0, rs=0x9f6f3100) at search.c:366
#10 0x08071e60 in do_search (op=0x86c01d0, rs=0x9f6f3100) at search.c:217
#11 0x0806eb44 in connection_operation (ctx=0x9f6f31f0, arg_v=0x86c01d0) at
connection.c:1133
#12 0x0806f01e in connection_read_thread (ctx=0x9f6f31f0, argv=0x46) at
connection.c:1259
#13 0x0817ad1f in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:663
#14 0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#15 0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6
(gdb) frame 2
#2  0x08144215 in bdb_cache_delete_internal (cache=0x82b6df4, e=0x84017d8,
decr=0) at cache.c:1335
1335            bdb_cache_entryinfo_lock( e->bei_parent );
(gdb) p e->bei_parent
$1 = (struct bdb_entry_info *) 0x0
(gdb) up
#3  0x081430e5 in hdb_cache_lru_purge (bdb=0x82b6db8) at cache.c:749
749                                     bdb_cache_delete_internal(
&bdb->bi_cache, elru, 0 );
(gdb) p *elru
$2 = {bei_parent = 0x0, bei_id = 17, bei_lockpad = 0, bei_state = 0, bei_finders
= 0, bei_nrdn = {bv_len = 24, bv_val = 0x0}, bei_rdn = {bv_len = 24, 
    bv_val = 0x0}, bei_modrdns = 0, bei_ckids = 0, bei_dkids = 0, bei_e = 0x0,
bei_kids = 0x0, bei_kids_mutex = {__data = {__lock = 1, __count = 0, 
      __owner = 28050, __kind = 0, __nusers = 1, {__spins = 0, __list = {__next
= 0x0}}}, 
    __size = "\001\000\000\000\000\000\000\000\222m\000\000\000\000\000\000\001\000\000\000\000\000\000",
__align = 1}, bei_lrunext = 0x83b5e70, 
  bei_lruprev = 0x867b980}
(gdb) p bdb->bi_cache.c_leaves
$3 = 3737
(gdb) p eimax
$4 = 2000
(gdb) p bdb->bi_cache
$5 = {c_eifree = 0x8665b00, c_idtree = 0x9b322498, c_lruhead = 0x84017d8,
c_lrutail = 0x867b980, c_dntree = {bei_parent = 0x0, bei_id = 0, bei_lockpad =
0, 
    bei_state = 128, bei_finders = 0, bei_nrdn = {bv_len = 0, bv_val = 0x0},
bei_rdn = {bv_len = 0, bv_val = 0x0}, bei_modrdns = 0, bei_ckids = 1, 
    bei_dkids = 2, bei_e = 0x8320704, bei_kids = 0x8320248, bei_kids_mutex =
{__data = {__lock = 0, __count = 0, __owner = 0, __kind = 0, __nusers = 0, {
          __spins = 0, __list = {__next = 0x0}}}, __size = '\0' <repeats 23
times>, __align = 0}, bei_lrunext = 0x0, bei_lruprev = 0x0}, c_maxsize = 1000, 
  c_cursize = 117171, c_minfree = 1, c_eimax = 2000, c_eiused = 3740, c_leaves =
3737, c_purging = 1, c_txn = 0x8320188, c_rwlock = {__data = {__lock = 0, 
      __nr_readers = 0, __readers_wakeup = 8985, __writer_wakeup = 9500,
__nr_readers_queued = 0, __nr_writers_queued = 0, __flags = 0 '\0', 
      __shared = 0 '\0', __pad1 = 0 '\0', __pad2 = 0 '\0', __writer = 0}, 
    __size = "\000\000\000\000\000\000\000\000\031#\000\000\034%", '\0' <repeats
17 times>, __align = 0}, c_lru_mutex = {__data = {__lock = 1, __count = 0, 
      __owner = 28050, __kind = 0, __nusers = 1, {__spins = 0, __list = {__next
= 0x0}}}, 
    __size = "\001\000\000\000\000\000\000\000\222m\000\000\000\000\000\000\001\000\000\000\000\000\000",
__align = 1}, c_count_mutex = {__data = {
      __lock = 0, __count = 0, __owner = 0, __kind = 0, __nusers = 0, {__spins =
0, __list = {__next = 0x0}}}, __size = '\0' <repeats 23 times>, 
    __align = 0}, c_eifree_mutex = {__data = {__lock = 0, __count = 0, __owner =
0, __kind = 0, __nusers = 0, {__spins = 0, __list = {__next = 0x0}}}, 
    __size = '\0' <repeats 23 times>, __align = 0}}


If there's anything else you'd like me to print out, please let me know.

thread apply all bt follows:
Thread 10 (process 26366):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd2775 in pthread_join () from /lib/tls/i686/cmov/libpthread.so.0
#2  0x0817be19 in ldap_pvt_thread_join (thread=2730888080, thread_return=0x0) at
thr_posix.c:197
#3  0x0806bca3 in slapd_daemon () at daemon.c:2740
#4  0x0804e8c7 in main (argc=1, argv=0xbfc63124) at main.c:948

Thread 9 (process 26372):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7ba1676 in epoll_wait () from /lib/tls/i686/cmov/libc.so.6
#2  0x0806ae3a in slapd_daemon_task (ptr=0x0) at daemon.c:2366
#3  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#4  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6

Thread 8 (process 26374):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd5aa5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/cmov/libpthread.so.0
#2  0x0817beae in ldap_pvt_thread_cond_wait (cond=0x829cb0c, mutex=0x829caf4) at
thr_posix.c:277
#3  0x0817acac in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:654
#4  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#5  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6

Thread 7 (process 27916):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd5aa5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/cmov/libpthread.so.0
#2  0x0817beae in ldap_pvt_thread_cond_wait (cond=0x829cb0c, mutex=0x829caf4) at
thr_posix.c:277
#3  0x0817acac in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:654
#4  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#5  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6

Thread 6 (process 28043):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd5aa5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/cmov/libpthread.so.0
#2  0x0817beae in ldap_pvt_thread_cond_wait (cond=0x829cb0c, mutex=0x829caf4) at
thr_posix.c:277
#3  0x0817acac in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:654
#4  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#5  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6
Thread 5 (process 28044):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd5aa5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/cmov/libpthread.so.0
#2  0x0817beae in ldap_pvt_thread_cond_wait (cond=0x829cb0c, mutex=0x829caf4) at
thr_posix.c:277
#3  0x0817acac in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:654
#4  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#5  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6

Thread 4 (process 28045):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd5aa5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/cmov/libpthread.so.0
#2  0x0817beae in ldap_pvt_thread_cond_wait (cond=0x829cb0c, mutex=0x829caf4) at
thr_posix.c:277
#3  0x0817acac in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:654
#4  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#5  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6

Thread 3 (process 28046):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd5aa5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/cmov/libpthread.so.0
#2  0x0817beae in ldap_pvt_thread_cond_wait (cond=0x829cb0c, mutex=0x829caf4) at
thr_posix.c:277
#3  0x0817acac in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:654
#4  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#5  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6

Thread 2 (process 28048):
#0  0xb7f18410 in __kernel_vsyscall ()
#1  0xb7dd5aa5 in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib/tls/i686/cmov/libpthread.so.0
#2  0x0817beae in ldap_pvt_thread_cond_wait (cond=0x829cb0c, mutex=0x829caf4) at
thr_posix.c:277
#3  0x0817acac in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:654
#4  0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#5  0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6

Thread 1 (process 28050):
#0  0xb7dd3540 in pthread_mutex_lock () from /lib/tls/i686/cmov/libpthread.so.0
#1  0x0817beef in ldap_pvt_thread_mutex_lock (mutex=0x34) at thr_posix.c:296
#2  0x08144215 in bdb_cache_delete_internal (cache=0x82b6df4, e=0x84017d8,
decr=0) at cache.c:1335
#3  0x081430e5 in hdb_cache_lru_purge (bdb=0x82b6db8) at cache.c:749
#4  0x081438ba in hdb_cache_find_id (op=0x86c01d0, tid=0x86d0a30, id=1792,
eip=0x9f6f1e68, flag=2, lock=0x9f6f1e00) at cache.c:1016
#5  0x08115c29 in hdb_search (op=0x86c01d0, rs=0x9f6f3100) at search.c:705
#6  0x080f179a in overlay_op_walk (op=0x86c01d0, rs=0x9f6f3100, which=op_search,
oi=0x82d2b88, on=0x0) at backover.c:669
#7  0x080f19a2 in over_op_func (op=0x86c01d0, rs=0x9f6f3100, which=op_search) at
backover.c:721
#8  0x080f1a46 in over_op_search (op=0x86c01d0, rs=0x9f6f3100) at
backover.c:743
#9  0x080724ed in fe_op_search (op=0x86c01d0, rs=0x9f6f3100) at search.c:366
#10 0x08071e60 in do_search (op=0x86c01d0, rs=0x9f6f3100) at search.c:217
#11 0x0806eb44 in connection_operation (ctx=0x9f6f31f0, arg_v=0x86c01d0) at
connection.c:1133
#12 0x0806f01e in connection_read_thread (ctx=0x9f6f31f0, argv=0x46) at
connection.c:1259
#13 0x0817ad1f in ldap_int_thread_pool_wrapper (xpool=0x829caf0) at tpool.c:663
#14 0xb7dd14fb in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#15 0xb7ba0e5e in clone () from /lib/tls/i686/cmov/libc.so.6