[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
Re: New Phonetic Design
Howard Chu <hyc@symas.com> writes:
> This looks interesting, but a contribution of this size needs an
> explicit Copyright / License statement. See the OpenLDAP contributing
> guidelines. http://www.openldap.org/devel/contributing.html
>
> Also, OpenLDAP 2.2 is frozen, no new features are going into it. Anyway,
> all new code must be developed using the CVS HEAD, not a release branch.
> All of these policies are explained on the contributing web page I
> referenced above. Nobody on the project can touch your contribution
> until you've followed the guidelines on that web page.
The attached modifications to OpenLDAP Software are subject to the
following notice:
Copyright 2004 Alexandre PAUZIES
Redistribution and use in source and binary forms, with or without
modification, are permitted only as authorized by the OpenLDAP Public
License.
--- openldap/configure.in 2004-09-11 16:15:53.000000000 +0200
+++ openldap-phonetic2/configure.in 2004-09-22 18:00:10.954206024 +0200
@@ -194,6 +194,7 @@
OL_ARG_ENABLE(slapi,[ --enable-slapi enable SLAPI support (experimental)], no)dnl
OL_ARG_ENABLE(slp,[ --enable-slp enable SLPv2 support], no)dnl
OL_ARG_ENABLE(wrappers,[ --enable-wrappers enable tcp wrapper support], no)dnl
+OL_ARG_ENABLE(phonetic2,[ --enable-phonetic2 enable new phonetic system for approx], no)dnl
dnl ----------------------------------------------------------------
dnl SLAPD Backend Options
@@ -2049,6 +2050,29 @@
dnl fi
dnl ----------------------------------------------------------------
+dnl PHONETIC2
+ol_link_math=no
+if test $ol_enable_phonetic2 != no ; then
+ AC_CHECK_HEADERS(math.h)
+ if test $ac_cv_header_math_h != yes ; then
+ AC_MSG_ERROR([could not locate <math.h>])
+ fi
+
+ AC_CHECK_LIB(m,powf,[have_m=yes],[have_m=no])
+ if test $have_m = yes ; then
+ ol_link_math="yes"
+ fi
+
+ if test $ol_link_math != no ; then
+ ac_save_LIBS="$LIBS"
+ LIBS="$LIBS -lm"
+ AC_DEFINE(HAVE_LIBMATH,1,[define if you have math library -lm])
+ elif test $ol_enable_phonetic2 != auto ; then
+ AC_MSG_ERROR([could not locate Math library])
+ fi
+fi
+
+dnl ----------------------------------------------------------------
dnl SQL
ol_link_sql=no
if test $ol_enable_sql != no ; then
@@ -2470,7 +2494,9 @@
if test "$ol_enable_aci" != no ; then
AC_DEFINE(SLAPD_ACI_ENABLED,1,[define to support per-object ACIs])
fi
-
+if test "$ol_enable_phonetic2" != no ; then
+ AC_DEFINE(SLAPD_PHONETIC_V2,1,[define to support new phonetic system])
+fi
if test "$ol_link_modules" != no ; then
AC_DEFINE(SLAPD_MODULES,1,[define to support modules])
BUILD_SLAPD=yes
--- openldap/include/portable.h.in 2004-09-12 05:35:05.000000000 +0200
+++ openldap-phonetic2/include/portable.h.in 2004-09-22 17:45:25.436825128 +0200
@@ -632,6 +632,9 @@
/* Define if you have the socket library (-lsocket). */
#undef HAVE_LIBSOCKET
+/* Define if you have the math library (-lm). */
+#undef HAVE_LIBMATH
+
/* Package */
#undef OPENLDAP_PACKAGE
@@ -1001,6 +1004,9 @@
/* define to support SHELL backend */
#undef SLAPD_SHELL
+/* define to support new Phonetic system */
+#undef SLAPD_PHONETIC_V2
+
/* define to support SQL backend */
#undef SLAPD_SQL
--- openldap/servers/slapd/config.c 2004-09-20 19:44:05.000000000 +0200
+++ openldap-phonetic2/servers/slapd/config.c 2004-09-22 16:15:41.000000000 +0200
@@ -72,6 +72,8 @@
char *strtok_quote_ptr;
+char *slapd_lang = NULL;
+
int use_reverse_lookup = 0;
#ifdef LDAP_SLAPI
@@ -459,6 +461,23 @@
} else if ( strcasecmp( cargv[0], "replica-argsfile" ) == 0 ) {
/* ignore */ ;
+ /* get default lang for approx */
+ } else if ( strcasecmp( cargv[0], "lang" ) == 0 ) {
+ if ( cargc < 2 ) {
+#ifdef NEW_LOGGING
+ LDAP_LOG( CONFIG, CRIT,
+ "%s: line %d missing lang name in \"lang <language>\" "
+ "line.\n", fname, lineno, 0 );
+#else
+ Debug( LDAP_DEBUG_ANY,
+ "%s: line %d: missing lang name in \"lang <language>\" line\n",
+ fname, lineno, 0 );
+#endif
+
+ return( 1 );
+ }
+
+ slapd_lang = ch_strdup( cargv[1] );
/* default password hash */
} else if ( strcasecmp( cargv[0], "password-hash" ) == 0 ) {
--- openldap/servers/slapd/proto-slap.h 2004-09-20 03:52:39.000000000 +0200
+++ openldap-phonetic2/servers/slapd/proto-slap.h 2004-09-22 16:15:12.000000000 +0200
@@ -974,6 +974,8 @@
* phonetic.c
*/
LDAP_SLAPD_F (char *) phonetic LDAP_P(( char *s ));
+LDAP_SLAPD_F (char *) phonetic_v2 LDAP_P(( char *s ));
+LDAP_SLAPD_F (char *) post_phonetic_v2 LDAP_P(( char *s ));
/*
* referral.c
@@ -1333,6 +1335,8 @@
LDAP_SLAPD_V (char *) slapd_args_file;
LDAP_SLAPD_V (time_t) starttime;
+LDAP_SLAPD_V (char *) slapd_lang;
+
/* use time(3) -- no mutex */
#define slap_get_time() time( NULL )
--- openldap/servers/slapd/schema_init.c 2004-09-09 03:56:58.000000000 +0200
+++ openldap-phonetic2/servers/slapd/schema_init.c 2004-09-22 16:11:12.196248328 +0200
@@ -1392,6 +1392,10 @@
# define SLAPD_APPROX_WORDLEN 1
#endif
+#if defined(SLAPD_PHONETIC_V2)
+# define SLAPD_PHONETIC_V2_PRECISION 7
+#endif
+
static int
approxMatch(
int *matchp,
@@ -1402,7 +1406,7 @@
void *assertedValue )
{
struct berval *nval, *assertv;
- char *val, **values, **words, *c;
+ char *val, **values, **words, *c, *tmp;
int i, count, len, nextchunk=0, nextavail=0;
/* Yes, this is necessary */
@@ -1434,7 +1438,16 @@
values = (char **)ch_malloc( count * sizeof(char *) );
for ( c = nval->bv_val, i = 0; i < count; i++, c += strlen(c) + 1 ) {
words[i] = c;
+#if defined(SLAPD_PHONETIC_V2)
+ tmp = phonetic_v2(c);
+ values[i] = post_phonetic_v2(tmp);
+ Debug( LDAP_DEBUG_TRACE,
+ ">>> word: <%s> phonetic_word: <%s> post_phonetic_word: <%s>\n",
+ c, tmp, values[i] );
+ ch_free(tmp);
+#else
values[i] = phonetic(c);
+#endif
}
/* Work through the asserted value's words, to see if at least some
@@ -1459,11 +1472,24 @@
else {
/* Isolate the next word in the asserted value and phonetic it */
assertv->bv_val[nextchunk+len] = '\0';
+#if defined(SLAPD_PHONETIC_V2)
+ tmp = phonetic_v2(assertv->bv_val + nextchunk);
+ val = post_phonetic_v2(tmp);
+ Debug( LDAP_DEBUG_TRACE,
+ ">>> word: <%s> phonetic_word: <%s> post_phonetic_word: <%s>\n",
+ assertv->bv_val+nextchunk, tmp, val );
+ ch_free(tmp);
+#else
val = phonetic( assertv->bv_val + nextchunk );
+#endif
/* See if this phonetic chunk is in the remaining words of *value */
for( i=nextavail; i<count; i++ ){
+#if defined(SLAPD_PHONETIC_V2)
+ if( !strncmp( val, values[i], SLAPD_PHONETIC_V2_PRECISION ) ){
+#else
if( !strcmp( val, values[i] ) ){
+#endif
nextavail = i+1;
break;
}
@@ -1516,6 +1542,7 @@
int i,j, len, wordcount, keycount=0;
struct berval *newkeys;
BerVarray keys=NULL;
+ char *tmp;
for( j=0; values[j].bv_val != NULL; j++ ) {
struct berval val = BER_BVNULL;
@@ -1543,7 +1570,13 @@
for( c = val.bv_val, i = 0; i < wordcount; c += len + 1 ) {
len = strlen( c );
if( len < SLAPD_APPROX_WORDLEN ) continue;
+#if defined (SLAPD_PHONETIC_V2)
+ tmp = phonetic_v2(c);
+ ber_str2bv( post_phonetic_v2( tmp ), 0, 0, &keys[keycount] );
+ ch_free(tmp);
+#else
ber_str2bv( phonetic( c ), 0, 0, &keys[keycount] );
+#endif
keycount++;
i++;
}
@@ -1571,6 +1604,7 @@
int i, count, len;
struct berval *val;
BerVarray keys;
+ char *tmp;
/* Yes, this is necessary */
val = UTF8bvnormalize( ((struct berval *)assertedValue),
@@ -1599,7 +1633,13 @@
for( c = val->bv_val, i = 0; i < count; c += len + 1 ) {
len = strlen(c);
if( len < SLAPD_APPROX_WORDLEN ) continue;
+#if defined (SLAPD_PHONETIC_V2)
+ tmp = phonetic_v2(c);
+ ber_str2bv( post_phonetic_v2( tmp ), 0, 0, &keys[i] );
+ ch_free(tmp);
+#else
ber_str2bv( phonetic( c ), 0, 0, &keys[i] );
+#endif
i++;
}
--- openldap/servers/slapd/phonetic.c 2004-01-01 19:15:31.000000000 +0100
+++ openldap-phonetic2/servers/slapd/phonetic.c 2004-09-22 18:13:59.774206232 +0200
@@ -33,9 +33,14 @@
#include <ac/socket.h>
#include <ac/time.h>
+#if defined HAVE_LIBMATH
+ #include <math.h>
+#endif
+
#include "slap.h"
+#include "phonetic.h"
-#if !defined(SLAPD_METAPHONE) && !defined(SLAPD_PHONETIC)
+#if !defined(SLAPD_METAPHONE) && !defined(SLAPD_PHONETIC) && !defined(SLAPD_PHONETIC_V2)
#define SLAPD_METAPHONE
#endif
@@ -180,6 +185,288 @@
return( ch_strdup( phoneme ) );
}
+
+#elif defined(SLAPD_PHONETIC_V2)
+
+
+static command_t commands[] =
+{
+ { SLAP_PH_FIND_REPLACE, find_replace },
+ { SLAP_PH_NONE, NULL },
+};
+
+static check_t checks[] =
+{
+ { SLAP_PH_PRECEDED, is_preceded },
+ { SLAP_PH_FOLLOWED, is_followed },
+ { SLAP_PH_REPEATED, is_repeated },
+ { SLAP_PH_NONE, NULL },
+};
+
+
+
+/* This is the phonex rules, by Frederic BROUARD
+ (http://sqlpro.developpez.com/Soundex/SQL_AZ_soundex.html) */
+
+static rule_t phonetic_rules_fr_phonex[] =
+{
+ { {SLAP_PH_FIND_REPLACE, {"y", "i"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"h", ""}}, {{SLAP_PH_PRECEDED, "csp", SLAP_PH_NOT|SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"ph", "f"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"gan", "kan"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"gam", "kam"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"gain", "kain"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"gaim", "kaim"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ain", "yn"}}, {{SLAP_PH_FOLLOWED, "aeiou", SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"ein", "yn"}}, {{SLAP_PH_FOLLOWED, "aeiou", SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"aim", "yn"}}, {{SLAP_PH_FOLLOWED, "aeiou", SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"eim", "yn"}}, {{SLAP_PH_FOLLOWED, "aeiou", SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"eau", "o"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"oua", "2"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ein", "4"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ain", "4"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"eim", "4"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"aim", "4"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ /* { "é", "y", {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} }, */ /* Could not be use */
+ /* { "è", "y", {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} }, */ /* (APPROX flag to */
+ /* { "ê", "y", {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} }, */ /* normalize()) */
+ { {SLAP_PH_FIND_REPLACE, {"ai", "y"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ei", "y"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"er", "yr"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"et", "yt"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ess", "yss"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"an", "1"}}, {{SLAP_PH_FOLLOWED, "aeiou1234", SLAP_PH_OR|SLAP_PH_NOT}} },
+ { {SLAP_PH_FIND_REPLACE, {"am", "1"}}, {{SLAP_PH_FOLLOWED, "aeiou1234", SLAP_PH_NOT|SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"en", "1"}}, {{SLAP_PH_FOLLOWED, "aeiou1234", SLAP_PH_NOT|SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"em", "1"}}, {{SLAP_PH_FOLLOWED, "aeiou1234", SLAP_PH_NOT|SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"in", "4"}}, {{SLAP_PH_FOLLOWED, "aeiou1234", SLAP_PH_NOT|SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"s", "z"}}, {{SLAP_PH_FOLLOWED, "aeiou1234", SLAP_PH_OR},
+ {SLAP_PH_PRECEDED, "aeiou1234", SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"oe", "e"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"eu", "e"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"au", "o"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"oi", "2"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"oy", "2"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ou", "3"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"sch", "5"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ch", "5"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"sh", "5"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ss", "s"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"sc", "s"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"c", "s"}}, {{SLAP_PH_FOLLOWED, "ei", SLAP_PH_OR}} },
+ { {SLAP_PH_FIND_REPLACE, {"c", "k"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"q", "k"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"qu", "k"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"gu", "k"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"ga", "ka"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"go", "ko"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"gy", "ky"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"a", "o"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"d", "t"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"p", "t"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"j", "g"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"b", "f"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"v", "f"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"m", "n"}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_FIND_REPLACE, {"t", ""}}, {{SLAP_PH_FOLLOWED, SLAP_PH_ALL, SLAP_PH_AND|SLAP_PH_NOT}} },
+ { {SLAP_PH_FIND_REPLACE, {"x", ""}}, {{SLAP_PH_FOLLOWED, SLAP_PH_ALL, SLAP_PH_AND|SLAP_PH_NOT}} },
+ { {SLAP_PH_FIND_REPLACE, {SLAP_PH_ALL, ""}}, {{SLAP_PH_REPEATED, NULL, SLAP_PH_NONE}} },
+ { {SLAP_PH_NONE, {NULL}}, {{SLAP_PH_NONE, NULL, SLAP_PH_NONE}} },
+};
+
+
+static char phonetic_post_rules_fr_phonex[22] =
+{
+ '1', '2', '3', '4', '5', 'e', 'f', 'g', 'h', 'i', 'k',
+ 'l', 'n', 'o', 'r', 's', 't', 'u', 'w', 'x', 'y', 'z'
+};
+
+
+static phonetic_t phonetic_lang[] =
+{
+ {"fr", phonetic_rules_fr_phonex, phonetic_post_rules_fr_phonex},
+ {NULL, NULL, NULL},
+};
+
+
+static int is_followed(char *start, char *pos, condition_t *condition)
+{
+ char *p;
+
+ if (*(++pos))
+ {
+ if (condition->flag & SLAP_PH_OR)
+ {
+ if (strchr(condition->param, *pos) != NULL)
+ return ((condition->flag & SLAP_PH_NOT) ? 0 : 1);
+ }
+ else if (condition->flag & SLAP_PH_AND)
+ {
+ for (p = condition->param;
+ *p && *pos && *p == *pos; p++, pos++);
+ if (!*p)
+ return ((condition->flag & SLAP_PH_NOT) ? 0 : 1);
+ }
+ }
+ return ((condition->flag & SLAP_PH_NOT) ? 1 : 0);
+}
+
+
+static int is_repeated(char *start, char *pos, condition_t *condition)
+{
+ if ((*(pos+1)) && *pos == (*(pos + 1)))
+ return ((condition->flag & SLAP_PH_NOT) ? 0 : 1);
+ return ((condition->flag & SLAP_PH_NOT) ? 1 : 0);
+}
+
+static int is_preceded(char *start, char *pos, condition_t *condition)
+{
+ int i;
+
+ if (pos > start)
+ {
+ pos--;
+ if (condition->flag & SLAP_PH_OR)
+ {
+ if (strchr(condition->param, *pos) != NULL)
+ return ((condition->flag & SLAP_PH_NOT) ? 0 : 1);
+ }
+ else if (condition->flag & SLAP_PH_AND)
+ {
+ for (i = strlen(condition->param) - 1; i >= 0 &&
+ pos >= start && condition->param[i] == *pos;i--, pos--);
+ if (i < 0)
+ return ((condition->flag & SLAP_PH_NOT) ? 0 : 1);
+ }
+ }
+ return ((condition->flag & SLAP_PH_NOT) ? 1 : 0);
+}
+
+
+static int check_conditions(char *start, char *pos, rule_t *rule)
+{
+ int i;
+ int j;
+
+ for (i = 0; rule->conditions[i].name; i++)
+ for (j = 0; checks[j].name; j++)
+ if (checks[j].name == rule->conditions[i].name)
+ switch (rule->conditions[i].name)
+ {
+ case SLAP_PH_FOLLOWED:
+ if (!checks[j].try(start, pos+strlen(rule->action.params[0])-1,
+ &rule->conditions[i]))
+ return 0;
+ default:
+ if (!checks[j].try(start, pos, &rule->conditions[i]))
+ return 0;
+ }
+ return 1;
+}
+
+
+static char *replace(char *start, char *pos, rule_t *rule)
+{
+ int str_len;
+ int look_for_len;
+ int change_to_len;
+ int diff_len;
+
+ str_len = strlen(pos);
+ look_for_len = strlen(rule->action.params[0]);
+ if (!look_for_len)
+ look_for_len++;
+ change_to_len = strlen(rule->action.params[1]);
+ diff_len = look_for_len - change_to_len;
+
+ if (diff_len < 0) /* Do we really need this ? */
+ pos = ch_realloc(pos, (size_t)(strlen - diff_len +1));
+ memmove(pos + change_to_len, pos + look_for_len, str_len - diff_len + 1);
+ if (change_to_len)
+ memcpy(pos, rule->action.params[1], change_to_len);
+
+ return pos;
+}
+
+
+static void *find_replace(char *start, char *pos, rule_t *rule)
+{
+ if (!*pos)
+ return NULL;
+ if (*rule->action.params[0])
+ if ((pos = strstr(pos, rule->action.params[0])) == NULL)
+ return NULL;
+
+ if (!check_conditions(start, pos, rule))
+ find_replace(start, ++pos, rule);
+ else if (*(pos = replace(start, pos, rule)))
+ find_replace(start, pos, rule);
+
+ return NULL;
+}
+
+
+char *phonetic_v2(char *word)
+{
+ int i;
+ int j;
+ char *s;
+ rule_t *rules;
+
+ for (i = 0; phonetic_lang[i].lang != NULL && slapd_lang != NULL &&
+ strcmp(phonetic_lang[i].lang, slapd_lang); i++);
+ if (phonetic_lang[i].lang == NULL)
+ return NULL; /* Error, no phonetic rules found for this lang */
+
+ rules = phonetic_lang[i].rules;
+ s = ch_strdup(word);
+
+ for (i = 0; rules[i].action.name; i++)
+ for (j = 0; commands[j].name; j++)
+ if (rules[i].action.name == commands[j].name)
+ commands[j].run(s, s, &rules[i]);
+
+ return s;
+}
+
+
+char *post_phonetic_v2(char *word)
+{
+ int *tab;
+ int i;
+ int j;
+ double res;
+ char *res_str;
+ char *p;
+ char *post_rules;
+
+ for (i = 0; phonetic_lang[i].lang != NULL && slapd_lang != NULL &&
+ strcmp(phonetic_lang[i].lang, slapd_lang); i++);
+ if (phonetic_lang[i].lang == NULL)
+ return NULL; /* Error, no post phonetic rules found for this lang */
+
+ post_rules = phonetic_lang[i].post_rules;
+
+ tab = ch_malloc(sizeof(int) * strlen(word) + 1);
+ for (i = 0, p = word; *p; p++, i++)
+ for (j = 0; post_rules[j]; j++)
+ if (*p == post_rules[j])
+ tab[i] = j;
+
+ for (j = 0; post_rules[j]; j++);
+
+ for (res = 0.0, i = 0; i < strlen(word); i++)
+ res += tab[i] * powf(j, 0 -i -1);
+
+ ch_free (tab);
+
+ res_str = ch_malloc(sizeof(char) * 26);
+ sprintf(res_str, "%4.20f", res);
+
+ return res_str;
+}
+
+
#elif defined(SLAPD_METAPHONE)
/*
--- openldap/servers/slapd/phonetic.h 1970-01-01 01:00:00.000000000 +0100
+++ openldap-phonetic2/servers/slapd/phonetic.h 2004-09-22 17:01:42.284604880 +0200
@@ -0,0 +1,84 @@
+/* phonetic.h - routines to do phonetic matching */
+/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
+ *
+ * Copyright 1998-2004 The OpenLDAP Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ *
+ * A copy of this license is available in the file LICENSE in the
+ * top-level directory of the distribution or, alternatively, at
+ * <http://www.OpenLDAP.org/license.html>.
+ */
+/* Copyright 2004 Alexandre PAUZIES
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted only as authorized by the OpenLDAP
+ * Public License.
+ */
+
+#define SLAP_PH_NONE 0
+
+#define SLAP_PH_FOLLOWED 1
+#define SLAP_PH_REPEATED 2
+#define SLAP_PH_PRECEDED -1
+
+#define SLAP_PH_OR 1
+#define SLAP_PH_AND 2
+#define SLAP_PH_NOT 4
+#define SLAP_PH_ALL ""
+
+#define SLAP_PH_FIND_REPLACE 1
+
+#define SLAP_PH_MAX_CONDITIONS 4
+#define SLAP_PH_MAX_PARAMS 3
+
+
+typedef struct check_s
+{
+ int name;
+ int (*try)();
+} check_t;
+
+typedef struct command_s
+{
+ int name;
+ void *(*run)();
+} command_t;
+
+typedef struct condition_s
+{
+ int name;
+ char *param;
+ int flag;
+} condition_t;
+
+typedef struct action_s
+{
+ int name;
+ char *params[SLAP_PH_MAX_PARAMS];
+} action_t;
+
+typedef struct rule_s
+{
+ action_t action;
+ condition_t conditions[SLAP_PH_MAX_CONDITIONS];
+} rule_t;
+
+
+typedef struct phonetic_s
+{
+ char *lang;
+ rule_t *rules;
+ char *post_rules;
+} phonetic_t;
+
+
+static void *find_replace(char *start, char *pos, rule_t *rule);
+static char *replace(char *start, char *pos, rule_t *rule);
+static int check_conditions(char *start, char *pos, rule_t *rule);
+static int is_followed(char *start, char *pos, condition_t *condition);
+static int is_preceded(char *start, char *pos, condition_t *condition);
+static int is_repeated(char *start, char *pos, condition_t *condition);
+
I hope this one will meet your requirements.
Best regards,
Alexandre.
--
Alexandre PAUZIES <apauzies@linagora.com>
LINAGORA - http://www.linagora.com/