[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: (ITS#7974) LDBM's "laggard reader" flaw still present, in continue of ITS#7904



This is a multi-part message in MIME format.
--------------000000040604020102000407
Content-Type: text/plain; charset=windows-1251; format=flowed
Content-Transfer-Encoding: 7bit

The attached files is derived from OpenLDAP Software. All of the 
modifications
to OpenLDAP Software represented in the following patch(es) were 
developed by
Peter-Service LLC, Moscow, Russia. Peter-Service LLC has not assigned rights
and/or interest in this work to any party. I, Leonid Yuriev am authorized by
Peter-Service LLC, my employer, to release this work under the following 
terms.

Peter-Service LLC hereby places the following modifications to OpenLDAP 
Software
(and only these modifications) into the public domain. Hence, these
modifications may be freely used and/or redistributed for any purpose 
with or
without attribution and/or other notice.


--------------000000040604020102000407
Content-Type: text/x-patch;
 name="0001-lmdb-ITS-7974-oomkiller-feature.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="0001-lmdb-ITS-7974-oomkiller-feature.patch"

>From 85fce95eaa0e71ee43625ccc202c173f7d4acb4a Mon Sep 17 00:00:00 2001
From: Leo Yuriev <leo@yuriev.ru>
Date: Tue, 21 Oct 2014 19:25:32 +0400
Subject: [PATCH 1/2] lmdb: ITS#7974 oomkiller feature.

---
 libraries/liblmdb/lmdb.h | 34 +++++++++++++++++
 libraries/liblmdb/mdb.c  | 95 ++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index bdbb0b9..a3ca62e 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -1537,6 +1537,40 @@ int	mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx);
 	 * @return 0 on success, non-zero on failure.
 	 */
 int	mdb_reader_check(MDB_env *env, int *dead);
+
+	/** @brief A callback function for killing a laggard readers,
+	 * called in case of MDB_MAP_FULL error.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create().
+	 * @param[in] pid pid of the reader process.
+	 * @param[in] thread_id thread_id of the reader thread.
+	 * @param[in] txn Transaction number on which stalled.
+	 * @return -1 on failure (reader is not killed),
+	 *         0 on a race condition (no such reader),
+	 *		   1 on success (reader was killed),
+	 *		   >1 on success (reader was SURE killed).
+	 */
+typedef int (MDB_oomkiller_func)(MDB_env *env, int pid, void* thread_id, size_t txn);
+
+	/** @brief Set the oomkiller callback.
+	 *
+	 * Callback will be called only on out-of-pages case for killing
+	 * a laggard readers to allowing reclaiming of freeDB.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create().
+	 * @param[in] oomkiller A #MDB_oomkiller_func function or NULL to disable.
+	 */
+void mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller);
+
+	/** @brief Get the current oomkiller callback.
+	 *
+	 * Callback will be called only on out-of-pages case for killing
+	 * a laggard readers to allowing reclaiming of freeDB.
+	 *
+	 * @param[in] env An environment handle returned by #mdb_env_create().
+	 * @return A #MDB_oomkiller_func function or NULL if disabled.
+	 */
+MDB_oomkiller_func* mdb_env_get_oomkiller(MDB_env *env);
 /**	@} */
 
 #ifdef __cplusplus
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index 6cc3433..e60d83d 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -1145,6 +1145,7 @@ struct MDB_env {
 #endif
 	void		*me_userctx;	 /**< User-settable context */
 	MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
+	MDB_oomkiller_func *me_oomkiller; /**< Callback for killing laggard readers */
 };
 
 	/** Nested transaction */
@@ -1900,6 +1901,77 @@ mdb_find_oldest(MDB_txn *txn)
 	return oldest;
 }
 
+static txnid_t
+mdb_laggard_reader(MDB_env *env, int *laggard)
+{
+	txnid_t tail = 0;
+	if (laggard)
+		*laggard = -1;
+	if (env->me_txns->mti_txnid > 1) {
+		int i;
+		MDB_reader *r = env->me_txns->mti_readers;
+
+		tail = env->me_txns->mti_txnid - 1;
+		for (i = env->me_txns->mti_numreaders; --i >= 0; ) {
+			if (r[i].mr_pid) {
+				txnid_t mr = r[i].mr_txnid;
+				if (tail > mr) {
+					tail = mr;
+					if (laggard)
+						*laggard = i;
+				}
+			}
+		}
+	}
+
+	return tail;
+}
+
+static int
+mdb_oomkill_laggard(MDB_env *env)
+{
+	int dead, idx;
+	txnid_t tail = mdb_laggard_reader(env, &idx);
+	if (idx < 0)
+		return 0;
+
+	for(;;) {
+		MDB_reader *r;
+		MDB_THR_T tid;
+		pid_t pid;
+		int rc;
+
+		if (mdb_reader_check(env, &dead))
+			break;
+
+		if (dead && tail < mdb_laggard_reader(env, NULL))
+			return 1;
+
+		if (!env->me_oomkiller)
+			break;
+
+		r = &env->me_txns->mti_readers[ idx ];
+		pid = r->mr_pid;
+		tid = r->mr_tid;
+		if (r->mr_txnid != tail || pid <= 0)
+			continue;
+
+		rc = env->me_oomkiller(env, pid, (void*) tid, tail);
+		if (rc < 0)
+			break;
+
+		if (rc) {
+			r->mr_txnid = (txnid_t)-1;
+			if (rc > 1) {
+				r->mr_tid = 0;
+				r->mr_pid = 0;
+			}
+		}
+	}
+
+	return tail < mdb_laggard_reader(env, NULL);
+}
+
 /** Add a page to the txn's dirty list */
 static void
 mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
@@ -1978,6 +2050,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
 		goto fail;
 	}
 
+oomkill_retry:;
 	for (op = MDB_FIRST;; op = MDB_NEXT) {
 		MDB_val key, data;
 		MDB_node *leaf;
@@ -2073,9 +2146,11 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
 	i = 0;
 	pgno = txn->mt_next_pgno;
 	if (pgno + num >= env->me_maxpg) {
-			DPUTS("DB size maxed out");
-			rc = MDB_MAP_FULL;
-			goto fail;
+		DPUTS("DB size maxed out");
+		if (mdb_oomkill_laggard(env))
+			goto oomkill_retry;
+		rc = MDB_MAP_FULL;
+		goto fail;
 	}
 
 search_done:
@@ -9403,4 +9478,18 @@ mdb_reader_check(MDB_env *env, int *dead)
 		*dead = count;
 	return MDB_SUCCESS;
 }
+
+void
+mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller)
+{
+	if (env)
+		env->me_oomkiller = oomkiller;
+}
+
+MDB_oomkiller_func*
+mdb_env_get_oomkiller(MDB_env *env)
+{
+	return env ? env->me_oomkiller : NULL;
+}
+
 /** @} */
-- 
2.1.0


--------------000000040604020102000407
Content-Type: text/x-patch;
 name="0002-slapd-ITS-7974-oomkiller-feature.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="0002-slapd-ITS-7974-oomkiller-feature.patch"

>From caf076698e093077fa44e490797847c3187d485b Mon Sep 17 00:00:00 2001
From: Leo Yuriev <leo@yuriev.ru>
Date: Tue, 21 Oct 2014 19:49:05 +0400
Subject: [PATCH 2/2] slapd: ITS#7974 oomkiller feature.

---
 servers/slapd/back-mdb/back-mdb.h  |  3 +++
 servers/slapd/back-mdb/config.c    | 40 ++++++++++++++++++++++++++++++++++----
 servers/slapd/back-mdb/init.c      |  2 ++
 servers/slapd/back-mdb/proto-mdb.h |  2 ++
 4 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/servers/slapd/back-mdb/back-mdb.h b/servers/slapd/back-mdb/back-mdb.h
index 9d5d4b1..be82da8 100644
--- a/servers/slapd/back-mdb/back-mdb.h
+++ b/servers/slapd/back-mdb/back-mdb.h
@@ -81,6 +81,9 @@ struct mdb_info {
 	uint32_t	mi_txn_cp_kbyte;
 	struct re_s		*mi_txn_cp_task;
 	struct re_s		*mi_index_task;
+	uint32_t	mi_renew_lag;
+	uint32_t	mi_renew_percent;
+	int			mi_oomkill;
 
 	mdb_monitor_t	mi_monitor;
 
diff --git a/servers/slapd/back-mdb/config.c b/servers/slapd/back-mdb/config.c
index 5b402c5..b54da49 100644
--- a/servers/slapd/back-mdb/config.c
+++ b/servers/slapd/back-mdb/config.c
@@ -106,6 +106,8 @@ static slap_verbmasks mdb_envflags[] = {
 	{ BER_BVC("writemap"),	MDB_WRITEMAP },
 	{ BER_BVC("mapasync"),	MDB_MAPASYNC },
 	{ BER_BVC("nordahead"),	MDB_NORDAHEAD },
+#define MDB_OOMKILL (MDB_NOMEMINIT << 4)
+	{ BER_BVC("oomkill"),	MDB_OOMKILL },
 	{ BER_BVNULL, 0 }
 };
 
@@ -123,6 +125,23 @@ mdb_checkpoint( void *ctx, void *arg )
 	return NULL;
 }
 
+/* perform killing a laggard readers */
+int
+mdb_oomkiller(MDB_env *env, int pid, void* thread_id, size_t txn)
+{
+	if ( pid != getpid() ) {
+		if ( kill( pid, SIGKILL ) == 0 ) {
+			Debug( LDAP_DEBUG_ANY, "oomkiller: SIGKILL to pid %i\n", pid, 0, 0 );
+			sched_yield();
+			return 2;
+		}
+		if ( errno == ESRCH )
+			return 0;
+		Debug( LDAP_DEBUG_ANY, "oomkiller: SIGKILL to pid %i: %s\n", pid, strerror(errno), 0 );
+	}
+	return -1;
+}
+
 /* reindex entries on the fly */
 static void *
 mdb_online_index( void *ctx, void *arg )
@@ -313,12 +332,15 @@ mdb_cf_gen( ConfigArgs *c )
 				c->value_int = 1;
 			break;
 
-		case MDB_ENVFLAGS:
-			if ( mdb->mi_dbenv_flags ) {
-				mask_to_verbs( mdb_envflags, mdb->mi_dbenv_flags, &c->rvalue_vals );
-			}
+		case MDB_ENVFLAGS: {
+			long flags = mdb->mi_dbenv_flags;
+			if ( mdb->mi_oomkill )
+				flags |= MDB_OOMKILL;
+			if ( flags )
+				mask_to_verbs( mdb_envflags, flags, &c->rvalue_vals );
 			if ( !c->rvalue_vals ) rc = 1;
 			break;
+		}
 
 		case MDB_INDEX:
 			mdb_attr_index_unparse( mdb, &c->rvalue_vals );
@@ -380,6 +402,8 @@ mdb_cf_gen( ConfigArgs *c )
 			break;
 
 		case MDB_ENVFLAGS:
+			mdb->mi_oomkill = 0;
+			mdb_env_set_oomkiller( mdb->mi_dbenv, NULL );
 			if ( c->valx == -1 ) {
 				int i;
 				for ( i=0; mdb_envflags[i].mask; i++) {
@@ -596,6 +620,14 @@ mdb_cf_gen( ConfigArgs *c )
 		for ( i=1; i<c->argc; i++ ) {
 			j = verb_to_mask( c->argv[i], mdb_envflags );
 			if ( mdb_envflags[j].mask ) {
+
+				if ( MDB_OOMKILL == mdb_envflags[j].mask ) {
+					mdb->mi_oomkill = 1;
+					if ( mdb->mi_flags & MDB_IS_OPEN )
+						mdb_env_set_oomkiller( mdb->mi_dbenv, mdb_oomkiller );
+					break;
+				}
+
 				if ( mdb->mi_flags & MDB_IS_OPEN )
 					rc = mdb_env_set_flags( mdb->mi_dbenv, mdb_envflags[j].mask, 1 );
 				else
diff --git a/servers/slapd/back-mdb/init.c b/servers/slapd/back-mdb/init.c
index 1c5ab83..c7c09d4 100644
--- a/servers/slapd/back-mdb/init.c
+++ b/servers/slapd/back-mdb/init.c
@@ -150,6 +150,8 @@ mdb_db_open( BackendDB *be, ConfigReply *cr )
 		goto fail;
 	}
 
+	mdb_env_set_oomkiller( mdb->mi_dbenv, mdb->mi_oomkill ? mdb_oomkiller : NULL);
+
 #ifdef HAVE_EBCDIC
 	strcpy( path, mdb->mi_dbenv_home );
 	__atoe( path );
diff --git a/servers/slapd/back-mdb/proto-mdb.h b/servers/slapd/back-mdb/proto-mdb.h
index b6b8d7c..3ec986e 100644
--- a/servers/slapd/back-mdb/proto-mdb.h
+++ b/servers/slapd/back-mdb/proto-mdb.h
@@ -20,6 +20,8 @@ LDAP_BEGIN_DECL
 
 #define MDB_UCTYPE	"MDB"
 
+MDB_oomkiller_func mdb_oomkiller;
+
 /*
  * attr.c
  */
-- 
2.1.0


--------------000000040604020102000407--