[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
Re: (ITS#7974) LDBM's "laggard reader" flaw still present, in continue of ITS#7904
- To: openldap-its@OpenLDAP.org
- Subject: Re: (ITS#7974) LDBM's "laggard reader" flaw still present, in continue of ITS#7904
- From: leo@yuriev.ru
- Date: Thu, 23 Oct 2014 05:13:32 +0000
- Auto-submitted: auto-generated (OpenLDAP-ITS)
This is a multi-part message in MIME format.
--------------000000040604020102000407
Content-Type: text/plain; charset=windows-1251; format=flowed
Content-Transfer-Encoding: 7bit
The attached files is derived from OpenLDAP Software. All of the
modifications
to OpenLDAP Software represented in the following patch(es) were
developed by
Peter-Service LLC, Moscow, Russia. Peter-Service LLC has not assigned rights
and/or interest in this work to any party. I, Leonid Yuriev am authorized by
Peter-Service LLC, my employer, to release this work under the following
terms.
Peter-Service LLC hereby places the following modifications to OpenLDAP
Software
(and only these modifications) into the public domain. Hence, these
modifications may be freely used and/or redistributed for any purpose
with or
without attribution and/or other notice.
--------------000000040604020102000407
Content-Type: text/x-patch;
name="0001-lmdb-ITS-7974-oomkiller-feature.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="0001-lmdb-ITS-7974-oomkiller-feature.patch"
>From 85fce95eaa0e71ee43625ccc202c173f7d4acb4a Mon Sep 17 00:00:00 2001
From: Leo Yuriev <leo@yuriev.ru>
Date: Tue, 21 Oct 2014 19:25:32 +0400
Subject: [PATCH 1/2] lmdb: ITS#7974 oomkiller feature.
---
libraries/liblmdb/lmdb.h | 34 +++++++++++++++++
libraries/liblmdb/mdb.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 126 insertions(+), 3 deletions(-)
diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index bdbb0b9..a3ca62e 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -1537,6 +1537,40 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx);
* @return 0 on success, non-zero on failure.
*/
int mdb_reader_check(MDB_env *env, int *dead);
+
+ /** @brief A callback function for killing a laggard readers,
+ * called in case of MDB_MAP_FULL error.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create().
+ * @param[in] pid pid of the reader process.
+ * @param[in] thread_id thread_id of the reader thread.
+ * @param[in] txn Transaction number on which stalled.
+ * @return -1 on failure (reader is not killed),
+ * 0 on a race condition (no such reader),
+ * 1 on success (reader was killed),
+ * >1 on success (reader was SURE killed).
+ */
+typedef int (MDB_oomkiller_func)(MDB_env *env, int pid, void* thread_id, size_t txn);
+
+ /** @brief Set the oomkiller callback.
+ *
+ * Callback will be called only on out-of-pages case for killing
+ * a laggard readers to allowing reclaiming of freeDB.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create().
+ * @param[in] oomkiller A #MDB_oomkiller_func function or NULL to disable.
+ */
+void mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller);
+
+ /** @brief Get the current oomkiller callback.
+ *
+ * Callback will be called only on out-of-pages case for killing
+ * a laggard readers to allowing reclaiming of freeDB.
+ *
+ * @param[in] env An environment handle returned by #mdb_env_create().
+ * @return A #MDB_oomkiller_func function or NULL if disabled.
+ */
+MDB_oomkiller_func* mdb_env_get_oomkiller(MDB_env *env);
/** @} */
#ifdef __cplusplus
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index 6cc3433..e60d83d 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -1145,6 +1145,7 @@ struct MDB_env {
#endif
void *me_userctx; /**< User-settable context */
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
+ MDB_oomkiller_func *me_oomkiller; /**< Callback for killing laggard readers */
};
/** Nested transaction */
@@ -1900,6 +1901,77 @@ mdb_find_oldest(MDB_txn *txn)
return oldest;
}
+static txnid_t
+mdb_laggard_reader(MDB_env *env, int *laggard)
+{
+ txnid_t tail = 0;
+ if (laggard)
+ *laggard = -1;
+ if (env->me_txns->mti_txnid > 1) {
+ int i;
+ MDB_reader *r = env->me_txns->mti_readers;
+
+ tail = env->me_txns->mti_txnid - 1;
+ for (i = env->me_txns->mti_numreaders; --i >= 0; ) {
+ if (r[i].mr_pid) {
+ txnid_t mr = r[i].mr_txnid;
+ if (tail > mr) {
+ tail = mr;
+ if (laggard)
+ *laggard = i;
+ }
+ }
+ }
+ }
+
+ return tail;
+}
+
+static int
+mdb_oomkill_laggard(MDB_env *env)
+{
+ int dead, idx;
+ txnid_t tail = mdb_laggard_reader(env, &idx);
+ if (idx < 0)
+ return 0;
+
+ for(;;) {
+ MDB_reader *r;
+ MDB_THR_T tid;
+ pid_t pid;
+ int rc;
+
+ if (mdb_reader_check(env, &dead))
+ break;
+
+ if (dead && tail < mdb_laggard_reader(env, NULL))
+ return 1;
+
+ if (!env->me_oomkiller)
+ break;
+
+ r = &env->me_txns->mti_readers[ idx ];
+ pid = r->mr_pid;
+ tid = r->mr_tid;
+ if (r->mr_txnid != tail || pid <= 0)
+ continue;
+
+ rc = env->me_oomkiller(env, pid, (void*) tid, tail);
+ if (rc < 0)
+ break;
+
+ if (rc) {
+ r->mr_txnid = (txnid_t)-1;
+ if (rc > 1) {
+ r->mr_tid = 0;
+ r->mr_pid = 0;
+ }
+ }
+ }
+
+ return tail < mdb_laggard_reader(env, NULL);
+}
+
/** Add a page to the txn's dirty list */
static void
mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
@@ -1978,6 +2050,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
goto fail;
}
+oomkill_retry:;
for (op = MDB_FIRST;; op = MDB_NEXT) {
MDB_val key, data;
MDB_node *leaf;
@@ -2073,9 +2146,11 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
i = 0;
pgno = txn->mt_next_pgno;
if (pgno + num >= env->me_maxpg) {
- DPUTS("DB size maxed out");
- rc = MDB_MAP_FULL;
- goto fail;
+ DPUTS("DB size maxed out");
+ if (mdb_oomkill_laggard(env))
+ goto oomkill_retry;
+ rc = MDB_MAP_FULL;
+ goto fail;
}
search_done:
@@ -9403,4 +9478,18 @@ mdb_reader_check(MDB_env *env, int *dead)
*dead = count;
return MDB_SUCCESS;
}
+
+void
+mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller)
+{
+ if (env)
+ env->me_oomkiller = oomkiller;
+}
+
+MDB_oomkiller_func*
+mdb_env_get_oomkiller(MDB_env *env)
+{
+ return env ? env->me_oomkiller : NULL;
+}
+
/** @} */
--
2.1.0
--------------000000040604020102000407
Content-Type: text/x-patch;
name="0002-slapd-ITS-7974-oomkiller-feature.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="0002-slapd-ITS-7974-oomkiller-feature.patch"
>From caf076698e093077fa44e490797847c3187d485b Mon Sep 17 00:00:00 2001
From: Leo Yuriev <leo@yuriev.ru>
Date: Tue, 21 Oct 2014 19:49:05 +0400
Subject: [PATCH 2/2] slapd: ITS#7974 oomkiller feature.
---
servers/slapd/back-mdb/back-mdb.h | 3 +++
servers/slapd/back-mdb/config.c | 40 ++++++++++++++++++++++++++++++++++----
servers/slapd/back-mdb/init.c | 2 ++
servers/slapd/back-mdb/proto-mdb.h | 2 ++
4 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/servers/slapd/back-mdb/back-mdb.h b/servers/slapd/back-mdb/back-mdb.h
index 9d5d4b1..be82da8 100644
--- a/servers/slapd/back-mdb/back-mdb.h
+++ b/servers/slapd/back-mdb/back-mdb.h
@@ -81,6 +81,9 @@ struct mdb_info {
uint32_t mi_txn_cp_kbyte;
struct re_s *mi_txn_cp_task;
struct re_s *mi_index_task;
+ uint32_t mi_renew_lag;
+ uint32_t mi_renew_percent;
+ int mi_oomkill;
mdb_monitor_t mi_monitor;
diff --git a/servers/slapd/back-mdb/config.c b/servers/slapd/back-mdb/config.c
index 5b402c5..b54da49 100644
--- a/servers/slapd/back-mdb/config.c
+++ b/servers/slapd/back-mdb/config.c
@@ -106,6 +106,8 @@ static slap_verbmasks mdb_envflags[] = {
{ BER_BVC("writemap"), MDB_WRITEMAP },
{ BER_BVC("mapasync"), MDB_MAPASYNC },
{ BER_BVC("nordahead"), MDB_NORDAHEAD },
+#define MDB_OOMKILL (MDB_NOMEMINIT << 4)
+ { BER_BVC("oomkill"), MDB_OOMKILL },
{ BER_BVNULL, 0 }
};
@@ -123,6 +125,23 @@ mdb_checkpoint( void *ctx, void *arg )
return NULL;
}
+/* perform killing a laggard readers */
+int
+mdb_oomkiller(MDB_env *env, int pid, void* thread_id, size_t txn)
+{
+ if ( pid != getpid() ) {
+ if ( kill( pid, SIGKILL ) == 0 ) {
+ Debug( LDAP_DEBUG_ANY, "oomkiller: SIGKILL to pid %i\n", pid, 0, 0 );
+ sched_yield();
+ return 2;
+ }
+ if ( errno == ESRCH )
+ return 0;
+ Debug( LDAP_DEBUG_ANY, "oomkiller: SIGKILL to pid %i: %s\n", pid, strerror(errno), 0 );
+ }
+ return -1;
+}
+
/* reindex entries on the fly */
static void *
mdb_online_index( void *ctx, void *arg )
@@ -313,12 +332,15 @@ mdb_cf_gen( ConfigArgs *c )
c->value_int = 1;
break;
- case MDB_ENVFLAGS:
- if ( mdb->mi_dbenv_flags ) {
- mask_to_verbs( mdb_envflags, mdb->mi_dbenv_flags, &c->rvalue_vals );
- }
+ case MDB_ENVFLAGS: {
+ long flags = mdb->mi_dbenv_flags;
+ if ( mdb->mi_oomkill )
+ flags |= MDB_OOMKILL;
+ if ( flags )
+ mask_to_verbs( mdb_envflags, flags, &c->rvalue_vals );
if ( !c->rvalue_vals ) rc = 1;
break;
+ }
case MDB_INDEX:
mdb_attr_index_unparse( mdb, &c->rvalue_vals );
@@ -380,6 +402,8 @@ mdb_cf_gen( ConfigArgs *c )
break;
case MDB_ENVFLAGS:
+ mdb->mi_oomkill = 0;
+ mdb_env_set_oomkiller( mdb->mi_dbenv, NULL );
if ( c->valx == -1 ) {
int i;
for ( i=0; mdb_envflags[i].mask; i++) {
@@ -596,6 +620,14 @@ mdb_cf_gen( ConfigArgs *c )
for ( i=1; i<c->argc; i++ ) {
j = verb_to_mask( c->argv[i], mdb_envflags );
if ( mdb_envflags[j].mask ) {
+
+ if ( MDB_OOMKILL == mdb_envflags[j].mask ) {
+ mdb->mi_oomkill = 1;
+ if ( mdb->mi_flags & MDB_IS_OPEN )
+ mdb_env_set_oomkiller( mdb->mi_dbenv, mdb_oomkiller );
+ break;
+ }
+
if ( mdb->mi_flags & MDB_IS_OPEN )
rc = mdb_env_set_flags( mdb->mi_dbenv, mdb_envflags[j].mask, 1 );
else
diff --git a/servers/slapd/back-mdb/init.c b/servers/slapd/back-mdb/init.c
index 1c5ab83..c7c09d4 100644
--- a/servers/slapd/back-mdb/init.c
+++ b/servers/slapd/back-mdb/init.c
@@ -150,6 +150,8 @@ mdb_db_open( BackendDB *be, ConfigReply *cr )
goto fail;
}
+ mdb_env_set_oomkiller( mdb->mi_dbenv, mdb->mi_oomkill ? mdb_oomkiller : NULL);
+
#ifdef HAVE_EBCDIC
strcpy( path, mdb->mi_dbenv_home );
__atoe( path );
diff --git a/servers/slapd/back-mdb/proto-mdb.h b/servers/slapd/back-mdb/proto-mdb.h
index b6b8d7c..3ec986e 100644
--- a/servers/slapd/back-mdb/proto-mdb.h
+++ b/servers/slapd/back-mdb/proto-mdb.h
@@ -20,6 +20,8 @@ LDAP_BEGIN_DECL
#define MDB_UCTYPE "MDB"
+MDB_oomkiller_func mdb_oomkiller;
+
/*
* attr.c
*/
--
2.1.0
--------------000000040604020102000407--