[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: (ITS#8875) [Patch] Performance problems in back-mdb with large DITs and many aliases



This is a multi-part message in MIME format.
--------------0F1DE22CD9273A65E1AE5118
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 7bit

Howard Chu wrote:
> Henrik Bohnenkamp wrote:
>> On Mon, Jul 15, 2019 at 02:26:59PM +0100, Howard Chu wrote:
>>>
>>> Fyi, on our problematic test database with 11M entries and 3.7M aliases, a search with -a always , starting from the
>>> DB suffix, took 4 minutes without this patch, and 1235 minutes with this patch.
>>>
>>> Needless to say, that's not looking good. Still checking other test cases.
>>
>> Interesting, so the behavior is reversed now :-). I assume you have
>> found an alternative approach to solve the problem. That's fine with
>> me, I want the problem solved, not my patch integrated.  I'm of course
>> interested in how you do it. Surely you did not get the 4 minutes with
>> a stock 2.4.48 slapd?
> 
> For this size of DB we needed the ITS#8977 patches to accommodate larger IDLs.
> (I used 24 bits for IDLs, 16.7M slots)
> Also at this size, the IDL processing itself is the main bottleneck now. We would
> need to switch to bitmaps or trees to avoid this bottleneck, but that's also a
> much larger change than we can consider for this release.
> 
I've set up a more modest test database along the lines of ITS#7657. It has 500,000 users,
30,000 aliases total, and 435 in ou=alias2 (all the rest under ou=alias1).

For unpatched back-mdb:

time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias1,dc=example,dc=com -a always
# search result
search: 2
result: 0 Success

# numResponses: 29567
# numEntries: 29566

real    0m42.504s
user    0m1.344s
sys     0m2.996s

time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias2,dc=example,dc=com -a always
# search result
search: 2
result: 0 Success

# numResponses: 437
# numEntries: 436

real    0m48.406s
user    0m0.040s
sys     0m0.076s

For back-mdb with e90e8c7d3c12d897bb0584ba04dc519d4f23acf9

time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias1,dc=example,dc=com -a always
# search result
search: 2
result: 0 Success

# numResponses: 29567
# numEntries: 29566

real    0m5.500s
user    0m1.516s
sys     0m2.944s

time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias2,dc=example,dc=com -a always
# search result
search: 2
result: 0 Success

# numResponses: 437
# numEntries: 436

real    0m0.399s
user    0m0.048s
sys     0m0.060s

For back-mdb with this ITS#8875 patch

time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias1,dc=example,dc=com -a always
# search result
search: 2
result: 0 Success

# numResponses: 29567
# numEntries: 29566

real    0m6.020s
user    0m1.640s
sys     0m3.372s

time ../clients/tools/ldapsearch -x -H ldap://:9012 -D cn=manager,dc=example,dc=com -w secret -b ou=alias2,dc=example,dc=com -a always
# search result
search: 2
result: 0 Success

# numResponses: 437
# numEntries: 436

real    0m0.203s
user    0m0.052s
sys     0m0.048s

It seems close enough in this case (I didn't do enough repeated runs to average out any measurement error) while
the committed patch performs better on the really ugly test case.

The tool to generate the test LDIF is attached. It reads an LDIF containing 500,000 users on stdin, and outputs the same LDIF,
with aliases interspersed, on stdout.

-- 
  -- Howard Chu
  CTO, Symas Corp.           http://www.symas.com
  Director, Highland Sun     http://highlandsun.com/hyc/
  Chief Architect, OpenLDAP  http://www.openldap.org/project/

--------------0F1DE22CD9273A65E1AE5118
Content-Type: text/x-csrc;
 name="mkalias.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="mkalias.c"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define NUMUSERS 500000
#define NUMALIASES	30000
#define NUMSUBALIASES	435

static const char ou1[] = "\
dn: ou=alias1,dc=example,dc=com\n\
objectclass: top\n\
objectclass: organizationalUnit\n\
ou: alias1\n";

static const char ou2[] = "\
dn: ou=alias2,dc=example,dc=com\n\
objectclass: top\n\
objectclass: organizationalUnit\n\
ou: alias2\n";

int qcmp(const void *a, const void *b)
{
	const int *i = a, *j = b;
	return *i - *j;
}

int main() {
	char line[1024];
	int aliases[NUMALIASES];
	int subaliases[NUMSUBALIASES];
	int i;
	int gotuser = 0, nusers=0, naliases=0, nsubaliases=0;
	
	/* select a random subset of users to generate aliases */
	srand(time(0L));
	for (i=0; i<NUMALIASES; i++) {
		aliases[i] = rand() % NUMUSERS;
	}
uniq1:
	qsort(aliases, NUMALIASES, sizeof(int), qcmp);
	/* make sure they're unique */
	for (i=1; i<NUMALIASES; i++) {
		if (aliases[i-1] == aliases[i]) {
			aliases[i] = rand() % NUMUSERS;
			goto uniq1;
		}
	}
	/* select a radnom susbset of aliases for the target subtree */
	for (i=0; i<NUMSUBALIASES; i++) {
		subaliases[i] = rand() % NUMALIASES;
	}
uniq2:
	qsort(subaliases, NUMSUBALIASES, sizeof(int), qcmp);
	/* make sure they're unique */
	for (i=1; i<NUMSUBALIASES; i++) {
		if (subaliases[i-1] == subaliases[i]) {
			subaliases[i] = rand() % NUMALIASES;
			goto uniq2;
		}
	}
	for (i=0; i<NUMSUBALIASES; i++) {
		subaliases[i] = aliases[subaliases[i]];
	}
	while (fgets(line, sizeof(line), stdin) != NULL) {
		if (line[0] == '#')
			continue;
		if (!strncmp(line, "dn: ", 4)) {
			if (!gotuser) {
				if (!strncmp(line+4, "uid=", 4)) {
					gotuser = 1;
					puts(ou1);
					puts(ou2);
				}
			} else {
				if (nusers == aliases[naliases] ||
					nusers == subaliases[nsubaliases]) {
					char *ou;
					int id;
					sscanf(line, "dn: uid=user.%d,", &id);
					if (nusers == subaliases[nsubaliases]) {
						ou = "alias2";
						nsubaliases++;
					} else {
						ou = "alias1";
					}
					naliases++;
					printf("dn: uid=x.%d,ou=%s,dc=example,dc=com\n", id, ou);
					puts("objectclass: alias");
					puts("objectclass: extensibleObject");
					printf("aliasedObjectName: %s\n", line+4);
				}
				nusers++;
			}
		}
		fputs(line, stdout);
	}
	return 0;
}

--------------0F1DE22CD9273A65E1AE5118--