[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: Improved handling for large number of databases / Access newly opened database from another transaction



Here the test results (run on a iMac 2.7 GHz Intel Core i5)

lmdb improved 

[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10] databases in [0.41516] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100] databases in [0.35304] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [1000] databases in [0.49425] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10000] databases in [2.23236] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100000] databases in [15.28527] seconds



lmdb original

[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10] databases in [0.35039] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100] databases in [0.65547] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [1000] databases in [5.48897] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [10000] databases in [67.13091] seconds
[1000000] iterations (begin, cursor_open, cursor_close, abort) with [100000] databases in [781.53778] seconds




As expected with small number of databases the original lmdb is slightly faster but the improved handling quickly outperforms the original implementation.

Test code:

#include "lmdb.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <sys/time.h>
#include <mach/clock.h>
#include <mach/mach.h>

static char *env_name = "/Developer/tmp/testdb";

#define MAX_MAP_SIZ     (1024 * 1024 * 100)

#define NUM_ITERATIPONS (1000 * 1000)

#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr)
#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0))
#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \
"%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort()))

static MDB_env *env;
static MDB_dbi main_dbi;

static MDB_dbi numDbs = 0;
static MDB_dbi *dbi;

void setup(unsigned int dbNum) {
    int rc;

    E(mdb_env_create(&env));
    E(mdb_env_set_maxreaders(env, 1));
    E(mdb_env_set_maxdbs(env, dbNum));
    E(mdb_env_set_mapsize(env, MAX_MAP_SIZ));
    E(mdb_env_open(env, env_name, 0, 0664));

    
    numDbs = dbNum;
    dbi = malloc(sizeof(MDB_dbi) * numDbs);
    MDB_txn *txn;

    
    E(mdb_txn_begin(env, NULL, 0, &txn));
    E(mdb_dbi_open(txn, NULL, 0, &main_dbi));
    
    for (unsigned int i = 0; i < numDbs; i++) {
        char name[16];
        
        sprintf(name, "%03x", i);
        
        E(mdb_dbi_open(txn, name, MDB_CREATE, &dbi[i]));
    }
    
    E(mdb_txn_commit(txn));
}

void cleanup() {
    mdb_env_close(env);
    
    char name[1024];

    sprintf(name, "%s/data.mdb", env_name);
    unlink(name);
    sprintf(name, "%s/lock.mdb", env_name);
    unlink(name);
}

struct timespec get_time() {
    struct timespec ts;
    
    clock_serv_t cclock;
    mach_timespec_t mts;
    host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
    clock_get_time(cclock, &mts);
    mach_port_deallocate(mach_task_self(), cclock);
    ts.tv_sec = mts.tv_sec;
    ts.tv_nsec = mts.tv_nsec;
    

    return ts;
}

void test(unsigned int num_iterations) {
   int rc;
   MDB_txn *txn;
   MDB_cursor *cursor;

   struct timespec ts = get_time();

   for (unsigned int i = 0; i < num_iterations; i++) {
      E(mdb_txn_begin(env, NULL, MDB_RDONLY, &txn));
        
      E(mdb_cursor_open(txn, dbi[0], &cursor));
        
      mdb_cursor_close(cursor);
      mdb_txn_abort(txn);
   }
    

   struct timespec te = get_time();
    

   printf("[%d] iterations (begin, cursor_open, cursor_close, abort) with [%d] databases in [%.5f] seconds\n\n",
     num_iterations,
     numDbs,
     ((double)te.tv_sec + 1.0e-9*te.tv_nsec) -
     ((double)ts.tv_sec + 1.0e-9*ts.tv_nsec));
}

int main(int argc,char * argv[]) {
    setup(10);
    test(1000 * 1000);
    cleanup();

    setup(100);
    test(1000 * 1000);
    cleanup();

    setup(1000);
    test(1000 * 1000);
    cleanup();

    setup(10000);
    test(1000 * 1000);
    cleanup();

    setup(100000);
    test(1000 * 1000);
    cleanup();

    return 0;
}










On 27/05/16 07:37, "Howard Chu" <hyc@symas.com> wrote:

>Jürg Bircher wrote:
>> Hello
>>
>> Improved handling for large number of databases
>> ===============================================
>
>> If interested let me know how to contribute.
>
>Looks interesting, yes. I assume you have profiled the code before and after 
>the suggested changes, please provide your profiling results.
>
>Please read the Developer Guidelines.
>  http://www.openldap.org/devel/contributing.html
>
> > Access newly opened database from another transaction
> > =======================================================
>
>Sounds like an oddball case. Applications should open all their DBIs from a 
>single thread and not start any other threads/transactions until all setup is 
>completed.
>
>> Hope it is useful!
>
>Thanks.
>
>-- 
>   -- Howard Chu
>   CTO, Symas Corp.           http://www.symas.com
>   Director, Highland Sun     http://highlandsun.com/hyc/
>   Chief Architect, OpenLDAP  http://www.openldap.org/project/