[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: (ITS#5077) syncrepl.add_cmp() infinite loop on swapped values



--Apple-Mail-4--750948447
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	delsp=yes;
	format=flowed


On Aug 23, 2007, at 2:56 PM, Howard Chu wrote:
[ ... re attribute value matching ... ]
>
> The only way to do this right, without dying on an O(n^2)  
> algorithm, is to sort both arrays first and then walk through them  
> in order as the original code did. If you want to submit such a  
> patch, please do. I don't have a lot of time to focus on this at  
> the moment.

I did look at this, and the attached patch is what I came up with,
relative to HEAD 1.356.  I have tested it, but unfortunately the
results are confounded by other problems - a crash (syncprov thread
queue entry so->s_qtask wasn't in slapd_rq, and some pointer values in
the former structure were 0x30303030), and in the end the replica was
missing a lot of attribute deletions.  Wouldn't think either was
related to attr_cmp(), but I am out of time to look at these problems
and keep trying to demonstrate a 100% successful syncrepl replication.

I didn't sort the arrays.  I tried that (quicksort algorithm), and it
was very expensive.  If there's reason to hope that the old and new
values will be mostly in the same order, then it is much cheaper in
this case to start with a simple pairwise walk through, as in the
older code, and then clean up with a full O(n^2) match on what's left.
The implementation is also simpler.

Actual performance depends on the data, but with the data I used for
tests of the algorithms, I found that this approach outperforms the
original 2.4.4 implementation, in terms of bvmatch counts.  That's
because the 2.4.4 needed to look ahead through the rest of the value
array in a couple of places in the loop.  Performance will be worst
where many values are changed, i.e., many adds and deletes - many
adds and few deletes, or vice versa, will be significantly faster.
I think this is good for most real world situations.

I restored variable names from 2.4.4 because they returned to their
original usage, likewise with the sml_values handling, and dedicated
a variable to the modtail logic at the end of attr_cmp().

I haven't looked into it, but happened to notice just this afternoon
a comment in dn_callback():  "We assume that attributes are saved in
the same order in the remote and local databases.  So ..."

	Donn Cave, donn@u.washington.edu


--Apple-Mail-4--750948447
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
	x-unix-mode=0644;
	name=syncrepl.c
Content-Disposition: attachment;
	filename=syncrepl.c

--- servers/slapd/syncrepl.c.orig	2007-09-18 15:10:52.000000000 -0700
+++ servers/slapd/syncrepl.c	2007-09-18 15:57:33.000000000 -0700
@@ -2673,17 +2673,98 @@
 	return rc;
 }
 
+/*  Remove null elements. */
+static int
+nonulls( struct berval **a, int na )
+{
+	int i, n;
+	for ( i = n = 0; i < na; ++i ) {
+		if ( a[i] ) {
+			if ( i > n )
+				a[n] = a[i];
+			++n;
+		}
+	}
+	return n;
+}
+
+/*
+**  Secondary difference:  remove any remaining matching elements,
+**  by brute force M*N comparison.
+*/
+static void
+ixpurge( struct berval **iva, int niva, int *nivap,
+	 struct berval **ivb, int nivb, int *nivbp )
+{
+	int ia, ib;
+	for ( ia = 0; ia < niva; ++ia ) {
+		for ( ib = 0; ib < nivb; ++ib ) {
+ 			if ( ivb[ib] && bvmatch( iva[ia], ivb[ib] ) ) {
+				iva[ia] = 0;
+				ivb[ib] = 0;
+				break;
+			}
+		}
+	}
+	*nivap = nonulls( iva, niva );
+	*nivbp = nonulls( ivb, nivb );
+}
+
+/*
+**  Compare two lists of berval strings that are supposed to be in
+**  similar order, and return list of indices to old (delete) and
+**  new (add) elements of the respective lists.  If of significantly
+**  different sizes, the smaller should be first for optimal performance.
+*/
+static void
+diff1(
+	struct berval *bva,
+	int na,
+	struct berval *bvb,
+	int nb,
+	struct berval **dva,
+	int *ndvap,
+	struct berval **dvb,
+	int *ndvbp
+)
+{
+	int ia, ib, ib0, ndva, ndvb, ndvar, ndvbr;
+	ndva = ndvb = 0;
+	for ( ia = ib0 = 0; ia < na; ++ia ) {
+		/*  Find a/b match, starting from last b anchor. */
+		for ( ib = ib0; ib < nb && !bvmatch( &bva[ia], &bvb[ib] ); ++ib )
+				;
+		if ( ib < nb ) {
+			/*  Found.  Record non-matching b's, reset anchor. */
+			for ( ; ib0 < ib; ++ib0 )
+				dvb[ndvb++] = &bvb[ib0];
+			++ib0;
+		} else {
+			/*  Not found, record non-matching a. */
+			dva[ndva++] = &bva[ia];
+		}
+	}
+	/*  Record remaining unmatched b's. */
+	for ( ; ib0 < nb; ++ib0 )
+		dvb[ndvb++] = &bvb[ib0];
+	/*  Clean up spurious non-matches, in case order assumptions invalid. */
+	ixpurge( dva, ndva, &ndvar, dvb, ndvb, &ndvbr );
+	*ndvap = ndvar;
+	*ndvbp = ndvbr;
+}
+
 static void
 attr_cmp( Operation *op, Attribute *old, Attribute *new,
 	Modifications ***mret, Modifications ***mcur )
 {
-	int i, j;
+	int tails;
+	int i;
 	Modifications *mod, **modtail;
 
 	modtail = *mret;
 
 	if ( old ) {
-		int n, o, nn, no;
+		int n, o, a, d;
 		struct berval **adds, **dels;
 		/* count old and new */
 		for ( o=0; old->a_vals[o].bv_val; o++ ) ;
@@ -2692,105 +2773,83 @@
 		/* there MUST be both old and new values */
 		assert( o != 0 );
 		assert( n != 0 );
-		j = 0;
 
 		adds = op->o_tmpalloc( sizeof(struct berval *) * n, op->o_tmpmemctx );
 		dels = op->o_tmpalloc( sizeof(struct berval *) * o, op->o_tmpmemctx );
 
-		for ( i=0; i<o; i++ ) dels[i] = &old->a_vals[i];
-		for ( i=0; i<n; i++ ) adds[i] = &new->a_vals[i];
-
-		nn = n; no = o;
-
-		for ( i=0; i<o; i++ ) {
-			for ( j=0; j<n; j++ ) {
-				if ( !adds[j] )
-					continue;
-				if ( bvmatch( dels[i], adds[j] ) ) {
-					no--;
-					nn--;
-					adds[j] = NULL;
-					dels[i] = NULL;
-					break;
-				}
-			}
-		}
+		if ( o > n )
+			diff1( new->a_vals, n, old->a_vals, o, adds, &a, dels, &d );
+		else
+			diff1( old->a_vals, o, new->a_vals, n, dels, &d, adds, &a );
+		tails = 0;
 
-		i = j;
 		/* all old values were deleted, just use the replace op */
-		if ( no == o ) {
-			i = j-1;
-		} else if ( no ) {
+		if ( d == o ) {
+			tails = 1;
+		} else if ( d ) {
 		/* delete some values */
 			mod = ch_malloc( sizeof( Modifications ) );
 			mod->sml_op = LDAP_MOD_DELETE;
 			mod->sml_flags = 0;
 			mod->sml_desc = old->a_desc;
 			mod->sml_type = mod->sml_desc->ad_cname;
-			mod->sml_values = ch_malloc( ( no + 1 ) * sizeof(struct berval) );
+			mod->sml_values = ch_malloc( ( d + 1 ) * sizeof(struct berval) );
 			if ( old->a_vals != old->a_nvals ) {
-				mod->sml_nvalues = ch_malloc( ( no + 1 ) * sizeof(struct berval) );
+				mod->sml_nvalues = ch_malloc( ( d + 1 ) * sizeof(struct berval) );
 			} else {
 				mod->sml_nvalues = NULL;
 			}
-			j = 0;
-			for ( i = 0; i < o; i++ ) {
-				if ( !dels[i] ) continue;
-				ber_dupbv( &mod->sml_values[j], &old->a_vals[i] );
+			for ( i = 0; i < d; i++ ) {
+				ber_dupbv( &mod->sml_values[i], dels[i] );
 				if ( mod->sml_nvalues ) {
-					ber_dupbv( &mod->sml_nvalues[j], &old->a_nvals[i] );
+					ber_dupbv( &mod->sml_nvalues[i], dels[i] );
 				}
-				j++;
 			}
-			BER_BVZERO( &mod->sml_values[j] );
+			BER_BVZERO( &mod->sml_values[i] );
 			if ( mod->sml_nvalues ) {
-				BER_BVZERO( &mod->sml_nvalues[j] );
+				BER_BVZERO( &mod->sml_nvalues[i] );
 			}
 			*modtail = mod;
 			modtail = &mod->sml_next;
-			i = j;
+			tails = 0;
 		}
 		op->o_tmpfree( dels, op->o_tmpmemctx );
 		/* some values were added */
-		if ( nn && no < o ) {
+		if ( a && d < o ) {
 			mod = ch_malloc( sizeof( Modifications ) );
 			mod->sml_op = LDAP_MOD_ADD;
 			mod->sml_flags = 0;
 			mod->sml_desc = old->a_desc;
 			mod->sml_type = mod->sml_desc->ad_cname;
-			mod->sml_values = ch_malloc( ( nn + 1 ) * sizeof(struct berval) );
+			mod->sml_values = ch_malloc( ( a + 1 ) * sizeof(struct berval) );
 			if ( old->a_vals != old->a_nvals ) {
-				mod->sml_nvalues = ch_malloc( ( nn + 1 ) * sizeof(struct berval) );
+				mod->sml_nvalues = ch_malloc( ( a + 1 ) * sizeof(struct berval) );
 			} else {
 				mod->sml_nvalues = NULL;
 			}
-			j = 0;
-			for ( i = 0; i < n; i++ ) {
-				if ( !adds[i] ) continue;
-				ber_dupbv( &mod->sml_values[j], &new->a_vals[i] );
+			for ( i = 0; i < a; i++ ) {
+				ber_dupbv( &mod->sml_values[i], adds[i] );
 				if ( mod->sml_nvalues ) {
-					ber_dupbv( &mod->sml_nvalues[j], &new->a_nvals[i] );
+					ber_dupbv( &mod->sml_nvalues[i], adds[i] );
 				}
-				j++;
 			}
-			BER_BVZERO( &mod->sml_values[j] );
+			BER_BVZERO( &mod->sml_values[i] );
 			if ( mod->sml_nvalues ) {
-				BER_BVZERO( &mod->sml_nvalues[j] );
+				BER_BVZERO( &mod->sml_nvalues[i] );
 			}
 			*modtail = mod;
 			modtail = &mod->sml_next;
-			i = j;
+			tails = 0;
 		}
 		op->o_tmpfree( adds, op->o_tmpmemctx );
 	} else {
 		/* new attr, just use the new mod */
-		i = 0;
-		j = 1;
+		tails = 1;
 	}
 	/* advance to next element */
 	mod = **mcur;
 	if ( mod ) {
-		if ( i != j ) {
+		if ( tails ) {
 			**mcur = mod->sml_next;
 			*modtail = mod;
 			modtail = &mod->sml_next;

--Apple-Mail-4--750948447--