[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: LMDB and text encoding



Here is a fixed version of the patch.

On Thu, Jan 29, 2015 at 10:29 AM, Timur Kristóf <timur.kristof@gmail.com> wrote:
>> mdb_dbi_open treats its name parameter as a C string. This means UTF-8 on
>> unixes and ANSI on Windows, which is problematic for cross-platform
>> applications. [...]
>
> Here is a patch that addresses this concern.
> If you like it, I'll move on to the other issue.
From 2e3a1ccaa7c36fb3a3d5db040067b21ed4d75962 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timur=20Krist=C3=B3f?= <venemo@msn.com>
Date: Thu, 29 Jan 2015 10:15:47 +0100
Subject: [PATCH] added an MDB_val variant of mdb_dbi_open

---
 libraries/liblmdb/lmdb.h     |  7 +++++++
 libraries/liblmdb/mdb.c      | 31 ++++++++++++++++++-------------
 libraries/liblmdb/mdb_stat.c |  4 ++--
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index ebfbc5d..535ac8b 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -1099,6 +1099,13 @@ int  mdb_txn_renew(MDB_txn *txn);
 	 */
 int  mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
 
+	/** @brief Open a database in the environment.
+	 *
+	 * Same as mdb_dbi_open, but does not treat the name as a zero-terminated C string,
+	 * thus enabling you to use the encoding of your own choice for database names.
+	 */
+int mdb_dbi_open2(MDB_txn *txn, MDB_val *name, unsigned int flags, MDB_dbi *dbi);
+
 	/** @brief Retrieve statistics for a database.
 	 *
 	 * @param[in] txn A transaction handle returned by #mdb_txn_begin()
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index c4dc269..97e3c06 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -9144,15 +9144,22 @@ mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi)
 		 : ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn));
 }
 
-int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi)
+int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) {
+	MDB_val name_as_mdbval;
+	name_as_mdbval.mv_data = (void *)name;
+	name_as_mdbval.mv_size = name ? strlen(name) : 0;
+	
+	return mdb_dbi_open2(txn, &name_as_mdbval, flags, dbi);
+}
+
+int mdb_dbi_open2(MDB_txn *txn, MDB_val *name, unsigned int flags, MDB_dbi *dbi)
 {
-	MDB_val key, data;
+	MDB_val data;
 	MDB_dbi i;
 	MDB_cursor mc;
 	MDB_db dummy;
 	int rc, dbflag, exact;
 	unsigned int unused = 0, seq;
-	size_t len;
 
 	if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
 		mdb_default_cmp(txn, FREE_DBI);
@@ -9164,7 +9171,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
 		return MDB_BAD_TXN;
 
 	/* main DB? */
-	if (!name) {
+	if (!name || !name->mv_data) {
 		*dbi = MAIN_DBI;
 		if (flags & PERSISTENT_FLAGS) {
 			uint16_t f2 = flags & PERSISTENT_FLAGS;
@@ -9183,15 +9190,14 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
 	}
 
 	/* Is the DB already open? */
-	len = strlen(name);
 	for (i=2; i<txn->mt_numdbs; i++) {
 		if (!txn->mt_dbxs[i].md_name.mv_size) {
 			/* Remember this free slot */
 			if (!unused) unused = i;
 			continue;
 		}
-		if (len == txn->mt_dbxs[i].md_name.mv_size &&
-			!strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) {
+		if (name->mv_size == txn->mt_dbxs[i].md_name.mv_size &&
+			!memcmp(name->mv_data, txn->mt_dbxs[i].md_name.mv_data, name->mv_size)) {
 			*dbi = i;
 			return MDB_SUCCESS;
 		}
@@ -9208,10 +9214,8 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
 	/* Find the DB info */
 	dbflag = DB_NEW|DB_VALID;
 	exact = 0;
-	key.mv_size = len;
-	key.mv_data = (void *)name;
 	mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
-	rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact);
+	rc = mdb_cursor_set(&mc, name, &data, MDB_SET, &exact);
 	if (rc == MDB_SUCCESS) {
 		/* make sure this is actually a DB */
 		MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
@@ -9224,15 +9228,16 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
 		memset(&dummy, 0, sizeof(dummy));
 		dummy.md_root = P_INVALID;
 		dummy.md_flags = flags & PERSISTENT_FLAGS;
-		rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA);
+		rc = mdb_cursor_put(&mc, name, &data, F_SUBDATA);
 		dbflag |= DB_DIRTY;
 	}
 
 	/* OK, got info, add to table */
 	if (rc == MDB_SUCCESS) {
 		unsigned int slot = unused ? unused : txn->mt_numdbs;
-		txn->mt_dbxs[slot].md_name.mv_data = strdup(name);
-		txn->mt_dbxs[slot].md_name.mv_size = len;
+		txn->mt_dbxs[slot].md_name.mv_data = malloc(name->mv_size);
+		memcpy(txn->mt_dbxs[slot].md_name.mv_data, name->mv_data, name->mv_size);
+		txn->mt_dbxs[slot].md_name.mv_size = name->mv_size;
 		txn->mt_dbxs[slot].md_rel = NULL;
 		txn->mt_dbflags[slot] = dbflag;
 		/* txn-> and env-> are the same in read txns, use
diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c
index 1e92292..01f5ccb 100644
--- a/libraries/liblmdb/mdb_stat.c
+++ b/libraries/liblmdb/mdb_stat.c
@@ -229,8 +229,8 @@ int main(int argc, char *argv[])
 		while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) {
 			char *str;
 			MDB_dbi db2;
-			if (memchr(key.mv_data, '\0', key.mv_size))
-				continue;
+			/* We'd need an mdb_is_named_database function here to tell us if it really is a named database. */
+			
 			str = malloc(key.mv_size+1);
 			memcpy(str, key.mv_data, key.mv_size);
 			str[key.mv_size] = '\0';
-- 
2.1.0