[Date Prev][Date Next] [Chronological] [Thread] [Top]

Crash in LRU_DELETE (ITS#2759)



Full_Name: ocmer
Version: 2.1.22
OS: Linux 2.4.20-13.7smp
URL: ftp://ftp.openldap.org/incoming/
Submission from: (NULL) (217.120.171.59)


When running OpenLDAP 2.1.22 on a HP DL380, using 1 non-hyperthreaed Xeon
processor, a crash occured in LRU_DELETE. When using on the same machine both
hyperthreaded-enabled Xeon processors, the same crash occured.
OpenLDAP was put under load with 50 parallel connections, performing 1000 reads,
2 modify's, and 1 write action per connection, for a period of 43 minutes. We're
using the BDB backend.

Below a printout of the GDB output:

#0  0x0809941e in bdb_cache_find_entry_ndn2id (be=0x822acf0, cache=0x8229af8,
ndn=0xb27195c4) at cache.c:833
833                     LRU_DELETE( cache, ep );
(gdb) thread apply all bt

Thread 1 (process 858):
#0  0x0809941e in bdb_cache_find_entry_ndn2id (be=0x822acf0, cache=0x8229af8,
ndn=0xb27195c4) at cache.c:833
#1  0x0809c70a in bdb_dn2id_matched (be=0x822acf0, txn=0x0, in=0xba40f474,
id=0xb271966c, id2=0xb2719668, flags=0) at dn2id.c:403
#2  0x0809b9fc in bdb_dn2entry_rw (be=0x822acf0, tid=0x0, dn=0xba40f474,
e=0xb27196f0, matched=0xb27196ec, flags=0, rw=0, locker=126, lock=0xb27196cc)
    at dn2entry.c:47
#3  0x08092bac in bdb_referrals (be=0x822acf0, conn=0x414b142c, op=0xa8e040c8,
dn=0xba40f46c, ndn=0xba40f474, text=0xb271972c) at referral.c:52
#4  0x0805fd74 in backend_check_referrals (be=0x822acf0, conn=0x414b142c,
op=0xa8e040c8, dn=0xba40f46c, ndn=0xba40f474) at backend.c:1074
#5  0x08056bc3 in do_add (conn=0x414b142c, op=0xa8e040c8) at add.c:236
#6  0x080511f6 in connection_operation (ctx=0xba40e700, arg_v=0xa8e040c8) at
connection.c:927
#7  0x080aecaf in ldap_int_thread_pool_wrapper (xpool=0x81b3ef8) at tpool.c:426
#8  0x40031faf in pthread_start_thread () from /lib/i686/libpthread.so.0
(gdb) l
828
829                     /* set lru mutex */
830                     ldap_pvt_thread_mutex_lock( &cache->lru_mutex );
831
832                     /* lru */
833                     LRU_DELETE( cache, ep );
834                     LRU_ADD( cache, ep );
835
836                     /* free lru mutex */
837                     ldap_pvt_thread_mutex_unlock( &cache->lru_mutex );
(gdb) up
#1  0x0809c70a in bdb_dn2id_matched (be=0x822acf0, txn=0x0, in=0xba40f474,
id=0xb271966c, id2=0xb2719668, flags=0) at dn2id.c:403
403                     cached_id = bdb_cache_find_entry_ndn2id(be,
&bdb->bi_cache, &dn);
(gdb) l
398                     dn.bv_val[-1] = DN_BASE_PREFIX;
399
400                     *id = NOID;
401
402                     /* lookup cache */
403                     cached_id = bdb_cache_find_entry_ndn2id(be,
&bdb->bi_cache, &dn);
404
405                     if (cached_id != NOID) {
406                             rc = 0;
407                             *id = cached_id;
(gdb) up
#2  0x0809b9fc in bdb_dn2entry_rw (be=0x822acf0, tid=0x0, dn=0xba40f474,
e=0xb27196f0, matched=0xb27196ec, flags=0, rw=0, locker=126, lock=0xb27196cc)
    at dn2entry.c:47
47                      rc = bdb_dn2id_matched( be, tid, dn, &id, &id2, flags
);
(gdb) l
42
43              *e = NULL;
44
45              if( matched != NULL ) {
46                      *matched = NULL;
47                      rc = bdb_dn2id_matched( be, tid, dn, &id, &id2, flags
);
48              } else {
49                      rc = bdb_dn2id( be, tid, dn, &id, flags );
50              }
51
(gdb) up
#3  0x08092bac in bdb_referrals (be=0x822acf0, conn=0x414b142c, op=0xa8e040c8,
dn=0xba40f46c, ndn=0xba40f474, text=0xb271972c) at referral.c:52
52              rc = bdb_dn2entry_r( be, NULL, ndn, &e, &matched, 0, locker,
&lock );
(gdb) l
47                      return LDAP_OTHER;
48              }
49
50      dn2entry_retry:
51              /* get entry */
52              rc = bdb_dn2entry_r( be, NULL, ndn, &e, &matched, 0, locker,
&lock );
53
54              switch(rc) {
55              case DB_NOTFOUND:
56                      rc = 0;
(gdb) up
#4  0x0805fd74 in backend_check_referrals (be=0x822acf0, conn=0x414b142c,
op=0xa8e040c8, dn=0xba40f46c, ndn=0xba40f474) at backend.c:1074
1074                    rc = be->be_chk_referrals( be,
(gdb) l
1069            int rc = LDAP_SUCCESS;
1070
1071            if( be->be_chk_referrals ) {
1072                    const char *text;
1073
1074                    rc = be->be_chk_referrals( be,
1075                            conn, op, dn, ndn, &text );
1076
1077                    if( rc != LDAP_SUCCESS && rc != LDAP_REFERRAL ) {
1078                            send_ldap_result( conn, op, rc,
(gdb) up
#5  0x08056bc3 in do_add (conn=0x414b142c, op=0xa8e040c8) at add.c:236
236             rc = backend_check_referrals( be, conn, op, &e->e_name,
&e->e_nname );
(gdb) l
231                             NULL, text, NULL, NULL );
232                     goto done;
233             }
234
235             /* check for referrals */
236             rc = backend_check_referrals( be, conn, op, &e->e_name,
&e->e_nname );
237             if ( rc != LDAP_SUCCESS ) {
238                     goto done;
239             }
240


#0  0x0809941e in bdb_cache_find_entry_ndn2id (be=0x822acf0, cache=0x8229af8,
ndn=0xb27195c4) at cache.c:833
833                     LRU_DELETE( cache, ep );
(gdb) p cache
$2 = (Cache *) 0x8229af8
(gdb) p *cache
$3 = {c_maxsize = 100, c_cursize = 100, c_dntree = 0xa8c356e0, c_idtree =
0xa8c08c98, c_lruhead = 0xbb307920, c_lrutail = 0xba41ca38, c_rwlock =
0x8229bd0,
  lru_mutex = {__m_reserved = 0, __m_count = 0, __m_owner = 0x0, __m_kind = 0,
__m_lock = {__status = -1456998980, __spinlock = 0}}}
(gdb) p ep
$4 = (Entry *) 0xbb30b010
(gdb) p *ep
$5 = {e_id = 16, e_name = {bv_len = 49, bv_val = 0xbb30ae14
"ou=subscribers,ou=community1.zz,o=sicc.att.zzz.zz"}, e_nname = {bv_len = 49,
    bv_val = 0xbb30ae47 "ou=subscribers,ou=community1.zz,o=sicc.att.zzz.zz"},
e_attrs = 0xbb30b038, e_ocflags = 32, e_bv = {bv_len = 501,
    bv_val = 0xbb30ae10
"\202\001p1ou=subscribers,ou=community1.zz,o=sicc.att.zzz.zz"}, e_private =
0x0}


In ./servers/slapd/back-bdb/cache.c, the combination of the LRU_DELETE and
LRU_ADD call is listed 4 times.
In bdb_cache_add_entry_rw and bdb_cache_update_entry, the calls are protected by
locking both c_rwlock and lru_mutex.
In bdb_cache_find_entry_ndn2id and bdb_cache_find_entry_id, the calls are
protected by locking only the lru_mutex.
After we locked the c_rwlock as wel in bdb_cache_find_entry_ndn2id for the
LRU_DELETE and LRU_ADD calls, we run a test of 20 hours on 1 CPU without any
problems.