Index: org/apache/lucene/queryParser/QueryParser.jj
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/queryParser/QueryParser.jj,v
retrieving revision 1.38
diff -u -r1.38 QueryParser.jj
--- org/apache/lucene/queryParser/QueryParser.jj	26 Nov 2003 11:00:58 -0000	1.38
+++ org/apache/lucene/queryParser/QueryParser.jj	10 Jan 2004 00:41:29 -0000
@@ -71,6 +71,7 @@
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.search.*;
+import org.apache.lucene.search.phonetic.*;
 
 /**
  * This class is generated by JavaCC.  The only method that clients should need
@@ -448,6 +449,23 @@
     return new FuzzyQuery(t);
   }
 
+  /**
+   * Factory method for generating a query (similar to
+   * ({@link #getWildcardQuery}). Called when parser parses
+   * an input term token that has the phonetic suffix ($) appended.
+   *
+   * @param field Name of the field query will use.
+   * @param termStr Term token to use for building term for the query
+   *
+   * @return Resulting {@link Query} built for the term
+   * @exception ParseException throw in overridden method to disallow
+   */
+  protected Query getPhoneticQuery(String field, String termStr) throws ParseException
+  {
+    Term t = new Term(field, termStr);
+    return new PhoneticQuery(t);
+  }
+  
   public static void main(String[] args) throws Exception {
     QueryParser qp = new QueryParser("field",
                            new org.apache.lucene.analysis.SimpleAnalyzer());
@@ -465,9 +483,9 @@
 <*> TOKEN : {
   <#_NUM_CHAR:   ["0"-"9"] >
 | <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
-                          "[", "]", "\"", "{", "}", "~", "*", "?" ] >
+                          "[", "]", "\"", "{", "}", "~", "*", "?", "$" ] >
 | <#_TERM_START_CHAR: ( ~[ " ", "\t", "+", "-", "!", "(", ")", ":", "^",
-                           "[", "]", "\"", "{", "}", "~", "*", "?" ]
+                           "[", "]", "\"", "{", "}", "~", "*", "?", "$" ]
                        | <_ESCAPED_CHAR> ) >
 | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> ) >
 | <#_WHITESPACE: ( " " | "\t" ) >
@@ -499,6 +517,7 @@
 | <QUOTED:     "\"" (~["\""])+ "\"">
 | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
 | <FUZZY:     "~" >
+| <PHONETICTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "$" >
 | <SLOP:      "~" (<_NUM_CHAR>)+ >
 | <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
 | <WILDTERM:  <_TERM_START_CHAR>
@@ -611,6 +630,7 @@
   boolean wildcard = false;
   boolean fuzzy = false;
   boolean rangein = false;
+  boolean phonetic = false;
   Query q;
 }
 {
@@ -619,6 +639,7 @@
        term=<TERM>
        | term=<PREFIXTERM> { prefix=true; }
        | term=<WILDTERM> { wildcard=true; }
+       | term=<PHONETICTERM> { phonetic=true; }
        | term=<NUMBER>
      )
      [ <FUZZY> { fuzzy=true; } ]
@@ -629,6 +650,8 @@
        } else if (prefix) {
          q = getPrefixQuery(field, term.image.substring
 			    (0, term.image.length()-1));
+       } else if (phonetic) {
+         q = getPhoneticQuery(field, term.image.substring(0,term.image.length()-1));
        } else if (fuzzy) {
          q = getFuzzyQuery(field, term.image);
        } else {
Index: org/apache/lucene/search/IndexSearcher.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/IndexSearcher.java,v
retrieving revision 1.11
diff -u -r1.11 IndexSearcher.java
--- org/apache/lucene/search/IndexSearcher.java	16 Sep 2003 20:06:32 -0000	1.11
+++ org/apache/lucene/search/IndexSearcher.java	10 Jan 2004 00:41:30 -0000
@@ -125,6 +125,10 @@
    */
   public TopDocs search(Query query, Filter filter, final int nDocs)
        throws IOException {
+            
+        // premptive query rewrite to avoid multiple rewrite during weight calculation
+        query = rewrite(query);
+        
     Scorer scorer = query.weight(this).scorer(reader);
     if (scorer == null)
       return new TopDocs(0, new ScoreDoc[0]);
@@ -163,8 +167,11 @@
    * @param filter if non-null, a bitset used to eliminate some documents
    * @param results to receive hits
    */
-  public void search(Query query, Filter filter,
-                     final HitCollector results) throws IOException {
+    public void search(Query query, Filter filter, final HitCollector results)
+        throws IOException {
+            
+        query = rewrite(query);
+            
     HitCollector collector = results;
     if (filter != null) {
       final BitSet bits = filter.bits(reader);
Index: org/apache/lucene/search/PhraseScorer.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/PhraseScorer.java,v
retrieving revision 1.8
diff -u -r1.8 PhraseScorer.java
--- org/apache/lucene/search/PhraseScorer.java	11 Sep 2003 01:25:47 -0000	1.8
+++ org/apache/lucene/search/PhraseScorer.java	10 Jan 2004 00:41:30 -0000
@@ -101,7 +101,9 @@
 
             if (freq > 0.0) {
                 float score = similarity.tf(freq) * value;  // compute score
-                score *= Similarity.decodeNorm(norms[first.doc]); // normalize
+                // check for 'null' norms
+                if(norms!=null)                
+                    score *= Similarity.decodeNorm(norms[first.doc]); // normalize
                 results.collect(first.doc, score);	  // add to results
             }
             last.next();				  // resume scanning
Index: org/apache/lucene/search/TermScorer.java
===================================================================
RCS file: /home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/search/TermScorer.java,v
retrieving revision 1.6
diff -u -r1.6 TermScorer.java
--- org/apache/lucene/search/TermScorer.java	29 Jan 2003 17:18:55 -0000	1.6
+++ org/apache/lucene/search/TermScorer.java	10 Jan 2004 00:41:31 -0000
@@ -104,7 +104,9 @@
 	 ? scoreCache[f]			  // cache hit
 	 : similarity.tf(f)*weightValue;          // cache miss
 
-      score *= Similarity.decodeNorm(norms[d]);	  // normalize for field
+        // check for 'null' norms
+        if(norms!=null)
+            score *= Similarity.decodeNorm(norms[d]);	  // normalize for field
 
       c.collect(d, score);			  // collect score
 
Index: org/apache/lucene/search/phonetic/DoubleMetaphone.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/DoubleMetaphone.java
diff -N org/apache/lucene/search/phonetic/DoubleMetaphone.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/DoubleMetaphone.java	10 Jan 2004 00:41:32 -0000
@@ -0,0 +1,965 @@
+
+/*
+    Phonetix: a class-library of phonetic algorithms.
+    Copyright (C) 2001-2003  Claus Engel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+package org.apache.lucene.search.phonetic;
+
+/**
+ * Encoder implementing the phonetic algorithm "DoubleMetaphone".
+ * DoubleMetaphone was developed by Lawrence Philips
+ * and published in the June 2000 issue of the C/C++ Users Journal.
+ * DoubleMetaphone is an improvement
+ * of Philips' original Metaphone algorithm.
+ * <br><br>
+ * DoubleMetaphone sometimes generates two encodings of a word,
+ * instead of only one like Soundex and Metaphone.
+ * The first encoding is usually based on the most commonly heard
+ * pronounciation of the word in the U.S.A.
+ * For more information on DoubleMetaphone, please see Philips' article
+ * <a href="http://www.cuj.com/articles/2000/0006/0006d/0006d.htm">article</a>.
+ * <br><br>
+ * This implementation is based on the
+ * <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip">original Visual C++ implementation</a>
+ * by Lawrence Philips with
+ * <a href="http://aspell.sourceforge.net/metaphone/dmetaph.cpp">slight modifications and bug fixes</a>
+ * by Kevin Atkinson.
+ *
+ * @see Metaphone
+ * @see Soundex
+ * @author Claus Engel
+ */
+public final class DoubleMetaphone extends MetaphoneEncoder
+{
+    private final static String[] GN_KN_PN_WR_PS                    = new String[] { "GN", "KN", "PN", "WR", "PS" };
+    private final static String[] ACH                               = new String[] { "ACH" };
+    private final static String[] BACHER_MACHER                     = new String[] { "BACHER", "MACHER" };
+    private final static String[] CAESAR                            = new String[] { "CAESAR" };
+    private final static String[] CHIA                              = new String[] { "CHIA" };
+    private final static String[] CH                                = new String[] { "CH" };
+    private final static String[] CHAE                              = new String[] { "CHAE" };
+    private final static String[] HARAC_HARIS_HOR_HYM_HIA_HEM       = new String[] { "HARAC", "HARIS", "HOR", "HYM", "HIA", "HEM" };
+    private final static String[] CHORE                             = new String[] { "CHORE" };
+    private final static String[] SCH                               = new String[] { "SCH" };
+    private final static String[] VAN__VON__SCH                     = new String[] { "VAN ", "VON ", "SCH" };
+    private final static String[] ORCHES_ARCHIT_ORCHID              = new String[] { "ORCHES", "ARCHIT", "ORCHID" };
+    private final static String[] T_S                               = new String[] { "T", "S" };
+    private final static String[] A_O                               = new String[] { "A", "O" };
+    private final static String[] A_O_U_E                           = new String[] { "A", "O", "U", "E" };
+    private final static String[] L_R_N_M_B_H_F_V_W__               = new String[] { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
+    private final static String[] MC                                = new String[] { "MC" };
+    private final static String[] CZ                                = new String[] { "CZ" };
+    private final static String[] WICZ                              = new String[] { "WICZ" };
+    private final static String[] CIA                               = new String[] { "CIA" };
+    private final static String[] CC                                = new String[] { "CC" };
+    private final static String[] I_E_H                             = new String[] { "I", "E", "H" };
+    private final static String[] HU                                = new String[] { "HU" };
+    private final static String[] UCCEE_UCCES                       = new String[] { "UCCEE", "UCCES" };
+    private final static String[] CK_CG_CQ                          = new String[] { "CK", "CG", "CQ" };
+    private final static String[] CI_CE_CY                          = new String[] { "CI", "CE", "CY" };
+    private final static String[] CIO_CIE_CIA                       = new String[] { "CIO", "CIE", "CIA" };
+    private final static String[] _C__Q__G                          = new String[] { " C", " Q", " G" };
+    private final static String[] C_K_Q                             = new String[] { "C", "K", "Q" };
+    private final static String[] CE_CI                             = new String[] { "CE", "CI" };
+    private final static String[] DG                                = new String[] { "DG" };
+    private final static String[] I_E_Y                             = new String[] { "I", "E", "Y" };
+    private final static String[] DT_DD                             = new String[] { "DT", "DD" };
+    private final static String[] B_H_D                             = new String[] { "B", "H", "D" };
+    private final static String[] B_H                               = new String[] { "B", "H" };
+    private final static String[] C_G_L_R_T                         = new String[] { "C", "G", "L", "R", "T" };
+    private final static String[] EY                                = new String[] { "EY" };
+    private final static String[] LI                                = new String[] { "LI" };
+    private final static String[] Y_ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER= new String[] { "Y", "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
+    private final static String[] Y_ER                              = new String[] { "Y", "ER" };
+    private final static String[] DANGER_RANGER_MANGER              = new String[] { "DANGER", "RANGER", "MANGER" };
+    private final static String[] E_I                               = new String[] { "E", "I" };
+    private final static String[] RGY_OGY                           = new String[] { "RGY", "OGY" };
+    private final static String[] E_I_Y                             = new String[] { "E", "I", "Y" };
+    private final static String[] AGGI_OGGI                         = new String[] { "AGGI", "OGGI" };
+    private final static String[] ET                                = new String[] { "ET" };
+    private final static String[] IER_                              = new String[] { "IER " };
+    private final static String[] JOSE                              = new String[] { "JOSE" };
+    private final static String[] SAN_                              = new String[] { "SAN " };
+    private final static String[] L_T_K_S_N_M_B_Z                   = new String[] { "L", "T", "K", "S", "N", "M", "B", "Z" };
+    private final static String[] S_K_L                             = new String[] { "S", "K", "L" };
+    private final static String[] ILLO_ILLA_ALLE                    = new String[] { "ILLO", "ILLA", "ALLE" };
+    private final static String[] AS_OS                             = new String[] { "AS", "OS" };
+    private final static String[] ALLE                              = new String[] { "ALLE" };
+    private final static String[] UMB                               = new String[] { "UMB" };
+    private final static String[] P_B                               = new String[] { "P", "B" };
+    private final static String[] IE                                = new String[] { "IE" };
+    private final static String[] IER                               = new String[] { "IER" };
+    private final static String[] ER                                = new String[] { "ER" };
+    private final static String[] ME_MA                             = new String[] { "ME", "MA" };
+    private final static String[] ISL_YSL                           = new String[] { "ISL", "YSL" };
+    private final static String[] SUGAR                             = new String[] { "SUGAR" };
+    private final static String[] SH                                = new String[] { "SH" };
+    private final static String[] HEIM_HOEK_HOLM_HOLZ               = new String[] { "HEIM", "HOEK", "HOLM", "HOLZ" };
+    private final static String[] SIO_SIA                           = new String[] { "SIO", "SIA" };
+    private final static String[] SIAN                              = new String[] { "SIAN" };
+    private final static String[] M_N_L_W                           = new String[] { "M", "N", "L", "W" };
+    private final static String[] Z                                 = new String[] { "Z" };
+    private final static String[] SC                                = new String[] { "SC" };
+    private final static String[] OO_ER_EN_UY_ED_EM                 = new String[] { "OO", "ER", "EN", "UY", "ED", "EM" };
+    private final static String[] ER_EN                             = new String[] { "ER", "EN" };
+    private final static String[] AI_OI                             = new String[] { "AI", "OI" };
+    private final static String[] S_Z                               = new String[] { "S", "Z" };
+    private final static String[] TION                              = new String[] { "TION" };
+    private final static String[] TIA_TCH                           = new String[] { "TIA", "TCH" };
+    private final static String[] TH_TTH                            = new String[] { "TH", "TTH" };
+    private final static String[] OM_AM                             = new String[] { "OM", "AM" };
+    private final static String[] T_D                               = new String[] { "T", "D" };
+    private final static String[] WR                                = new String[] { "WR" };
+    private final static String[] WH                                = new String[] { "WH" };
+    private final static String[] EWSKI_EWSKY_OWSKI_OWSKY           = new String[] { "EWSKI", "EWSKY", "OWSKI", "OWSKY" };
+    private final static String[] WICZ_WITZ                         = new String[] { "WICZ", "WITZ" };
+    private final static String[] IAU_EAU                           = new String[] { "IAU", "EAU" };
+    private final static String[] AU_OU                             = new String[] { "AU", "OU" };
+    private final static String[] C_X                               = new String[] { "C", "X" };
+    private final static String[] ZO_ZI_ZA                          = new String[] { "ZO", "ZI", "ZA" };
+
+    private StringBuffer primaryBuffer;
+    private StringBuffer secondaryBuffer;
+    private boolean      hasAlternate;
+
+    /**
+     * Constructs a DoubleMetaphone encoder which generates keys of given
+     * maximal length.
+     * @param maxLength the maximal length of the generated keys. If negative,
+     *                  the lengths of the keys returned are only limited
+     *                  by the lengths of the words to encode.
+     */
+    public DoubleMetaphone (int maxLength)
+    {
+        super (maxLength);
+    }
+
+    /**
+     * Constructs a DoubleMetaphone encoder which generates keys with
+     * maximal length 4.
+     */
+    public DoubleMetaphone ()
+    {
+        super (4);
+    }
+
+    /**
+     * Returns a <tt>String</tt> identifying the algorithm.
+     */
+    public String toString()
+    {
+        return "DoubleMetaphone_"+maxLength;
+    }
+
+    private void add (final String main)
+    {
+        primaryBuffer.append(main);
+        secondaryBuffer.append(main);
+    }
+
+    private void add (final String main, final String alternate)
+    {
+        primaryBuffer.append(main);
+        if (alternate.length() > 0)
+        {
+            hasAlternate = true;
+            if (!alternate.equals(" "))
+                secondaryBuffer.append(alternate);
+        }
+        else
+        {
+            if (main.length() > 0 && !main.equals(" "))
+                secondaryBuffer.append(main);
+        }
+    }
+
+    private static boolean isSlavoGermanic (final String string)
+    {
+        return (string.indexOf('W')    >= 0) ||
+               (string.indexOf('K')    >= 0) ||
+               (string.indexOf("CZ")   >= 0) ||
+               (string.indexOf("WITZ") >= 0);
+    }
+
+    /**
+     * Returns an encoding of the given word, that is based on the most
+     * commonly heard pronounciation of the word in the U.S.A.
+     * @param word the word to encode.
+     * @return the encoding of the word. This is never <code>null</code>.
+     */
+    public String generateKey (final String word)
+    {
+        final String[] result = generateKeys(word);
+        return (result.length > 0) ? result[0] : "";
+    }
+
+    /**
+     * Returns the encodings of the given word. The first is based on the most
+     * commonly heard pronounciation of the word in the U.S.A.
+     * @param word the word to encode.
+     * @return an array of the encodings of the word.
+     *         This is never <code>null</code>.
+     */
+    public synchronized String[] generateKeys (String word)
+    {
+        if (word == null || word.length() == 0)
+            return EMPTY_KEYS;
+
+        primaryBuffer   = new StringBuffer(word.length());
+        secondaryBuffer = new StringBuffer(word.length());
+        hasAlternate    = false;
+
+        word = word.toUpperCase();
+
+        final int length = word.length();
+        final int last   = length -1;
+
+        final boolean isSlavoGermanic = isSlavoGermanic(word);
+
+        int n = 0;
+
+        // skip these when at start of word
+        if (match(word,0,GN_KN_PN_WR_PS))
+            n += 1;
+
+        // initial 'X' is pronounced 'Z' e.g. 'Xavier'
+        if (match(word,0,'X'))
+        {
+            add("S"); //'Z' maps to 'S'
+            n += 1;
+        }
+
+        while (n < length && (maxLength < 0 || (primaryBuffer.length() < maxLength && secondaryBuffer.length() < maxLength)))
+        {
+            switch (word.charAt(n))
+            {
+            case 'A':
+            case 'E':
+            case 'I':
+            case 'O':
+            case 'U':
+            case 'Y':
+                if (n == 0)
+                    //all init vowels now map to 'A'
+                    add("A");
+                n += 1;
+                break;
+
+            case 'B':
+                //"-mb", e.g", "dumb", already skipped over...
+                add("P");
+                n += match(word,n+1,'B') ? 2 : 1;
+                break;
+
+            case '�':
+                add("S");
+                n += 1;
+                break;
+
+            case 'C':
+                // various germanic
+                if ((n > 1) &&
+                    !isVowel(word,n-2) &&
+                    match(word,n-1,ACH) &&
+                    !match(word,n+2,'I') &&
+                    (!match(word,n+2,'E') || match(word,n-2,BACHER_MACHER)))
+                {
+                    add("K");
+                    n += 2;
+                    break;
+                }
+
+                // special case 'caesar'
+                if ((n == 0) && match(word,n,CAESAR))
+                {
+                    add("S");
+                    n += 2;
+                    break;
+                }
+
+                // italian 'chianti'
+                if (match(word,n,CHIA))
+                {
+                    add("K");
+                    n += 2;
+                    break;
+                }
+
+                if (match(word,n,CH))
+                {
+                    // find 'michael'
+                    if ((n > 0) && match(word,n,CHAE))
+                    {
+                        add("K", "X");
+                        n += 2;
+                        break;
+                    }
+
+                    // greek roots e.g. 'chemistry', 'chorus'
+                    if ((n == 0) &&
+                        match(word,n+1,HARAC_HARIS_HOR_HYM_HIA_HEM) &&
+                        !match(word,0,CHORE))
+                    {
+                        add("K");
+                        n += 2;
+                        break;
+                    }
+
+                    // germanic, greek, or otherwise 'ch' for 'kh' sound
+                    if (match(word,0,VAN__VON__SCH)
+                        // 'architect' but not 'arch', 'orchestra', 'orchid'
+                        || match(word,n-2,ORCHES_ARCHIT_ORCHID)
+                        || match(word,n+2,T_S)
+                        || (  ((n==0) || match(word,n-1,A_O_U_E)) &&
+                              //e.g., 'wachtler', 'wechsler', but not 'tichner'
+                              match(word,n+2,L_R_N_M_B_H_F_V_W__)  ))
+                    {
+                        add("K");
+                    }
+                    else
+                    {
+                        if (n > 0)
+                        {
+                            if (match(word,0,MC))
+                                // e.g., "McHugh"
+                                add("K");
+                            else
+                                add("X", "K");
+                        }
+                        else
+                            add("X");
+                    }
+                    n += 2;
+                    break;
+                }
+
+                // e.g., 'czerny'
+                if (match(word,n,CZ) && !match(word,n-2,WICZ))
+                {
+                    add("S", "X");
+                    n += 2;
+                    break;
+                }
+
+                // e.g., 'focaccia'
+                if (match(word,n+1,CIA))
+                {
+                    add("X");
+                    n += 3;
+                    break;
+                }
+
+                // double 'C', but not if e.g. 'McClellan'
+                if (match(word,n,CC) && !((n == 1) && match(word,0,'M')))
+                {
+                    // 'bellocchio' but not 'bacchus'
+                    if (match(word,n+2,I_E_H) && !match(word,n+2,HU))
+                    {
+                        // 'accident', 'accede', 'succeed'
+                        if (((n == 1) && match(word,n-1,'A'))
+                            || match(word,n-1,UCCEE_UCCES))
+                            add("KS");
+                        // 'bacci', 'bertucci', other italian
+                        else
+                            add("X");
+                        n += 3;
+                        break;
+                    }
+                    else
+                    {
+                        // Pierce's rule
+                        add("K");
+                        n += 2;
+                        break;
+                    }
+                }
+
+                if (match(word,n,CK_CG_CQ))
+                {
+                    add("K");
+                    n += 2;
+                    break;
+                }
+
+                if (match(word,n,CI_CE_CY))
+                {
+                    // italian vs. english
+                    if (match(word,n,CIO_CIE_CIA))
+                        add("S", "X");
+                    else
+                        add("S");
+                    n += 2;
+                    break;
+                }
+
+                // else
+                add("K");
+
+                // name sent in 'mac caffrey', 'mac gregor'
+                if (match(word,n+1,_C__Q__G))
+                    n += 3;
+                else
+                    n += (match(word,n+1,C_K_Q) && !match(word,n+1,CE_CI)) ? 2 : 1;
+                break;
+
+            case 'D':
+                if (match(word,n,DG))
+                {
+                    if (match(word,n+2,I_E_Y))
+                    {
+                        // e.g. 'edge'
+                        add("J");
+                        n += 3;
+                        break;
+                    }
+                    else
+                    {
+                        // e.g. 'edgar'
+                        add("TK");
+                        n += 2;
+                        break;
+                    }
+                }
+
+                if (match(word,n,DT_DD))
+                {
+                    add("T");
+                    n += 2;
+                    break;
+                }
+
+                // else
+                add("T");
+                n += 1;
+                break;
+
+            case 'F':
+                n += match(word,n+1,'F') ? 2 : 1;
+                add("F");
+                break;
+
+            case 'G':
+                if (match(word,n+1,'H'))
+                {
+                    if ((n > 0) && !isVowel(word,n-1))
+                    {
+                        add("K");
+                        n += 2;
+                        break;
+                    }
+
+                    if (n < 3)
+                    {
+                        // 'ghislane', 'ghiradelli'
+                        if (n == 0)
+                        {
+                            if (match(word,n+2,'I'))
+                                add("J");
+                            else
+                                add("K");
+                            n += 2;
+                            break;
+                        }
+                    }
+
+                    // Parker's rule (with some further refinements) - e.g., 'hugh'
+                    if(((n > 1) && match(word,n-2,B_H_D))
+                        // e.g., 'bough'
+                        || ((n > 2) && match(word,n-3,B_H_D))
+                        // e.g., 'broughton'
+                        || ((n > 3) && match(word,n-4,B_H)) )
+                    {
+                        n += 2;
+                        break;
+                    }
+                    else
+                    {
+                        // e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
+                        if ((n > 2) && match(word,n-1,'U') && match(word,n-3,C_G_L_R_T))
+                        {
+                            add("F");
+                        }
+                        else if ((n > 0) && !match(word,n-1,'I'))
+                        {
+                            add("K");
+                        }
+
+                        n += 2;
+                        break;
+                    }
+                }
+
+                if (match(word,n+1,'N'))
+                {
+                    if ((n == 1) && isVowel(word,0) && !isSlavoGermanic)
+                    {
+                        add("KN", "N");
+                    }
+                    else
+                    {
+                        // not e.g. 'cagney'
+                        if (!match(word,n+2,EY) && !match(word,n+1,'Y') && !isSlavoGermanic)
+                        {
+                            add("N", "KN");
+                        }
+                        else
+                        {
+                            add("KN");
+                        }
+                    }
+                    n += 2;
+                    break;
+                }
+
+                // 'tagliaro'
+                if (match(word,n+1,LI) && !isSlavoGermanic)
+                {
+                    add("KL", "L");
+                    n += 2;
+                    break;
+                }
+
+                // -ges-,-gep-,-gel-, -gie- at beginning
+                if ((n == 0) && match(word,n+1,Y_ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))
+                {
+                    add("K", "J");
+                    n += 2;
+                    break;
+                }
+
+                // -ger-,  -gy-
+                if (match(word,n+1,Y_ER) &&
+                    !match(word,0,DANGER_RANGER_MANGER) &&
+                    !match(word,n-1,E_I) &&
+                    !match(word,n-1,RGY_OGY))
+                {
+                    add("K", "J");
+                    n += 2;
+                    break;
+                }
+
+                // italian e.g, 'biaggi'
+                if (match(word,n+1,E_I_Y) || match(word,n-1,AGGI_OGGI))
+                {
+                    // obvious germanic
+                    if (match(word,0,VAN__VON__SCH) || match(word,n+1,ET))
+                    {
+                        add("K");
+                    }
+                    else
+                    {
+                        // always soft if french ending
+                        if (match(word,n+1,IER))
+                            add("J");
+                        else
+                            add("J", "K");
+                    }
+                    n += 2;
+                    break;
+                }
+
+                add("K");
+                n += match(word,n+1,'G') ? 2 : 1;
+                break;
+
+            case 'H':
+                // only keep if first & before vowel or btw. 2 vowels
+                if (((n == 0) || isVowel(word,n-1)) && isVowel(word,n+1))
+                {
+                    add("H");
+                    n += 2;
+                }
+                else
+                {
+                    //also takes care of 'HH'
+                    n += 1;
+                }
+                break;
+
+            case 'J':
+                // obvious spanish, 'jose', 'san jacinto'
+                if (match(word,n,JOSE) || match(word,0,SAN_))
+                {
+                    if (((n == 0) && match(word,n+4,' ')) || match(word,0,SAN_))
+                    {
+                        add("H");
+                    }
+                    else
+                    {
+                        add("J", "H");
+                    }
+                    n += 1;
+                    break;
+                }
+
+                if ((n == 0) && !match(word,n,JOSE))
+                {
+                    add("J", "A");//Yankelovich/Jankelowicz
+                }
+                else
+                {
+                    // spanish pron. of e.g. 'bajador'
+                    if (isVowel(word,n-1) && !isSlavoGermanic && match(word,n+1,A_O))
+                    {
+                        add("J", "H");
+                    }
+                    else
+                    {
+                        if (n == last)
+                        {
+                            add("J", " ");
+                        }
+                        else
+                        {
+                            if (!match(word,n+1,L_T_K_S_N_M_B_Z) &&
+                                !match(word,n-1,S_K_L))
+                                add("J");
+                        }
+                    }
+                }
+
+                n += match(word,n+1,'J') ? 2 : 1;
+                break;
+
+            case 'K':
+                n += match(word,n+1,'K') ? 2 : 1;
+                add("K");
+                break;
+
+            case 'L':
+                if (match(word,n+1,'L'))
+                {
+                    // spanish e.g. 'cabrillo', 'gallegos'
+                    if (((n == length-3) && match(word,n-1,ILLO_ILLA_ALLE))
+                        || ( (match(word,last-1,AS_OS) || match(word,last,A_O))
+                             && match(word,n-1,ALLE) ))
+                    {
+                        add("L", " ");
+                        n += 2;
+                        break;
+                    }
+                    n += 2;
+                }
+                else
+                {
+                    n += 1;
+                }
+                add("L");
+                break;
+
+            case 'M':
+                if ((match(word,n-1,UMB) && ((n+1 == last) || match(word,n+2,ER)))
+                    //'dumb','thumb'
+                    || match(word,n+1,'M'))
+                {
+                    n += 2;
+                }
+                else
+                {
+                    n += 1;
+                }
+                add("M");
+                break;
+
+            case 'N':
+                n += match(word,n+1,'N') ? 2 : 1;
+                add("N");
+                break;
+
+            case '�':
+                n += 1;
+                add("N");
+                break;
+
+            case 'P':
+                if (match(word,n+1,'H'))
+                {
+                    add("F");
+                    n += 2;
+                    break;
+                }
+
+                // also account for "campbell", "raspberry"
+                n += match(word,n+1,P_B) ? 2 : 1;
+                add("P");
+                break;
+
+            case 'Q':
+                n += match(word,n+1,'Q') ? 2 : 1;
+                add("K");
+                break;
+
+            case 'R':
+                // french e.g. 'rogier', but exclude 'hochmeier'
+                if ((n == last) &&
+                    !isSlavoGermanic &&
+                    match(word,n-2,IE) &&
+                    !match(word,n-4,ME_MA))
+                {
+                    add("", "R");
+                }
+                else
+                {
+                    add("R");
+                }
+
+                n += match(word,n+1,'R') ? 2 : 1;
+                break;
+
+            case 'S':
+                // special cases 'island', 'isle', 'carlisle', 'carlysle'
+                if (match(word,n-1,ISL_YSL))
+                {
+                    n += 1;
+                    break;
+                }
+
+                // special case 'sugar-'
+                if ((n == 0) && match(word,n,SUGAR))
+                {
+                    add("X", "S");
+                    n += 1;
+                    break;
+                }
+
+                if (match(word,n,SH))
+                {
+                    // germanic
+                    if (match(word,n+1,HEIM_HOEK_HOLM_HOLZ))
+                        add("S");
+                    else
+                        add("X");
+                    n += 2;
+                    break;
+                }
+
+                // italian & armenian
+                if (match(word,n,SIO_SIA) || match(word,n,SIAN))
+                {
+                    if (!isSlavoGermanic)
+                        add("S", "X");
+                    else
+                        add("S");
+                    n += 3;
+                    break;
+                }
+
+                // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
+                // also, -sz- in slavic language altho in hungarian it is pronounced 's'
+                if (((n == 0) && match(word,n+1,M_N_L_W)) || match(word,n+1,'Z'))
+                {
+                    add("S", "X");
+                    n += match(word,n+1,'Z') ? 2 : 1;
+                    break;
+                }
+
+                if (match(word,n,SC))
+                {
+                    // Schlesinger's rule
+                    if (match(word,n+2,'H'))
+                    {
+                        // dutch origin, e.g. 'school', 'schooner'
+                        if (match(word,n+3,OO_ER_EN_UY_ED_EM))
+                        {
+                            // 'schermerhorn', 'schenker'
+                            if (match(word,n+3,ER_EN))
+                                add("X", "SK");
+                            else
+                                add("SK");
+                            n += 3;
+                            break;
+                        }
+                        else
+                        {
+                            if((n == 0) && !isVowel(word,3) && !match(word,3,'W'))
+                                add("X", "S");
+                            else
+                                add("X");
+                            n += 3;
+                            break;
+                        }
+                    }
+
+                    if (match(word,n+2,I_E_Y))
+                        add("S");
+                    else
+                        add("SK");
+                    n += 3;
+                    break;
+                }
+
+                // french e.g. 'resnais', 'artois'
+                if ((n == last) && match(word,n-2,AI_OI))
+                    add("", "S");
+                else
+                    add("S");
+
+                n += match(word,n+1,S_Z) ? 2 : 1;
+                break;
+
+            case 'T':
+                if (match(word,n,TION))
+                {
+                    add("X");
+                    n += 3;
+                    break;
+                }
+
+                if (match(word,n,TIA_TCH))
+                {
+                    add("X");
+                    n += 3;
+                    break;
+                }
+
+                if (match(word,n,TH_TTH))
+                {
+                    // special case 'thomas', 'thames' or germanic
+                    if (match(word,n+2,OM_AM) || match(word,0,VAN__VON__SCH))
+                        add("T");
+                    else
+                        add("0", "T");
+                    n += 2;
+                    break;
+                }
+
+                n += match(word,n+1,T_D) ? 2 : 1;
+                add("T");
+                break;
+
+            case 'V':
+                n += match(word,n+1,'V') ? 2 : 1;
+                add("F");
+                break;
+
+            case 'W':
+                // can also be in middle of word
+                if (match(word,n,WR))
+                {
+                    add("R");
+                    n += 2;
+                    break;
+                }
+
+                if ((n == 0) && (isVowel(word,n+1) || match(word,n,WH)))
+                {
+                    // Wasserman should match Vasserman
+                    if (isVowel(word,n+1))
+                        add("A", "F");
+                    else
+                        //need Uomo to match Womo
+                        add("A");
+                }
+
+                // Arnow should match Arnoff
+                if (((n == last) && isVowel(word,n-1))
+                    || match(word,n-1,EWSKI_EWSKY_OWSKI_OWSKY)
+                    || match(word,0,SCH))
+                {
+                    add("", "F");
+                    n += 1;
+                    break;
+                }
+
+                // polish e.g. 'filipowicz'
+                if (match(word,n,WICZ_WITZ))
+                {
+                    add("TS", "FX");
+                    n += 4;
+                    break;
+                }
+
+                // else skip it
+                n += 1;
+                break;
+
+            case 'X':
+                // french e.g. breaux
+                if (!((n == last) && (match(word,n-3,IAU_EAU) || match(word,n-2,AU_OU))))
+                    add("KS");
+
+                n += match(word,n+1,C_X) ? 2 : 1;
+                break;
+
+            case 'Z':
+                // chinese pinyin e.g. 'zhao'
+                if (match(word,n+1,'H'))
+                {
+                    add("J");
+                    n += 2;
+                    break;
+                }
+                else
+                {
+                    if (match(word,n+1,ZO_ZI_ZA)
+                        || (isSlavoGermanic && (n > 0) && !match(word,n-1,'T')))
+                    {
+                        add("S", "TS");
+                    }
+                    else
+                        add("S");
+                }
+
+                n += match(word,n+1,'Z') ? 2 : 1;
+                break;
+
+            default:
+                n += 1;
+            }
+        }
+
+        if (maxLength < 0)
+        {
+            // result with unlimited length
+            if (hasAlternate)
+                return new String[] { primaryBuffer.toString(), secondaryBuffer.toString() };
+            else
+                return new String[] { primaryBuffer.toString() };
+        }
+        else
+        {
+            // limit the length of the resulting strings
+            final int primaryLength = Math.min(maxLength, primaryBuffer.length());
+            if (hasAlternate)
+            {
+                final int secondaryLength = Math.min(maxLength, secondaryBuffer.length());
+                return new String[] { primaryBuffer.toString().substring(0,primaryLength),
+                                      secondaryBuffer.toString().substring(0,secondaryLength) };
+            }
+            else
+                return new String[] { primaryBuffer.toString().substring(0,primaryLength) };
+        }
+    }
+
+    /**
+     * Test of algorithm with default constructed encoder.
+     * The encoded arguments are printed to <tt>System.out</tt>.
+     */
+    public static void main (String[] argv)
+    {
+        final DoubleMetaphone generator = new DoubleMetaphone();
+
+        for (int n = 0; n < argv.length; n++)
+        {
+            String[] keys = generator.generateKeys(argv[n]);
+            for (int m=0; m < keys.length; m++)
+                System.out.print(keys[m] + " ");
+            System.out.println();
+        }
+    }
+}
+
Index: org/apache/lucene/search/phonetic/Metaphone.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/Metaphone.java
diff -N org/apache/lucene/search/phonetic/Metaphone.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/Metaphone.java	10 Jan 2004 00:41:32 -0000
@@ -0,0 +1,300 @@
+
+/*
+    Phonetix: a class-library of phonetic algorithms.
+    Copyright (C) 2001-2003  Claus Engel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+package org.apache.lucene.search.phonetic;
+
+/**
+ * Encoder implementing the phonetic algorithm "Metaphone".
+ * Metaphone was originally developed by Lawrence Philips.
+ * This implementation is based on his original BASIC implementation.
+ *
+ * @see DoubleMetaphone
+ * @see Soundex
+ * @author Claus Engel
+ */
+public final class Metaphone extends MetaphoneEncoder
+{
+    private final static String[] GN_KN_PN_WR_AE    = new String[] { "GN", "KN", "PN", "WR", "AE" };
+    private final static String[] WH                = new String[] { "WH" };
+    private final static String[] IA                = new String[] { "IA" };
+    private final static String[] SCE_SCI_SCY       = new String[] { "SCE", "SCI", "SCY" };
+    private final static String[] E_I_Y             = new String[] { "E", "I", "Y" };
+    private final static String[] SCH               = new String[] { "SCH" };
+    private final static String[] GE_GI_GY          = new String[] { "GE", "GI", "GY" };
+    private final static String[] NED               = new String[] { "NED" };
+    private final static String[] DGE_DGI_DGY       = new String[] { "DGE", "DGI", "DGY" };
+    private final static String[] C_S_P_T_G         = new String[] { "C", "S", "P", "T", "G" };
+    private final static String[] IO_IA             = new String[] { "IO", "IA" };
+    private final static String[] CH                = new String[] { "CH" };
+
+    /**
+     * Constructs a Metaphone encoder which generates keys of given
+     * maximal length.
+     * @param maxLength the maximal length of the generated keys. If negative,
+     *                  the lengths of the keys returned are only limited
+     *                  by the lengths of the words to encode.
+     */
+    public Metaphone (int maxLength)
+    {
+        super (maxLength);
+    }
+
+    /**
+     * Constructs a Metaphone encoder which generates keys with
+     * maximal length 4.
+     */
+    public Metaphone ()
+    {
+        super (4);
+    }
+
+    /**
+     * Returns a <tt>String</tt> identifying the algorithm.
+     */
+    public String toString()
+    {
+        return "Metaphone_"+maxLength;
+    }
+
+    /**
+     * Returns the encoding of the given word.
+     * @param word the word to encode.
+     * @return an array with the encoding of the word.
+     *         This is never <code>null</code>.
+     */
+    public String[] generateKeys (final String word)
+    {
+        return (word!=null && word.length()>0) ? new String[] {generateKey(word)} : EMPTY_KEYS;
+    }
+
+    /**
+     * Returns the encoding of the given word.
+     * @param word the word to encode.
+     * @return the encoding of the word. This is never <code>null</code>.
+     */
+    public String generateKey (String word)
+    {
+        if (word == null || word.length() == 0)
+            return "";
+
+        final StringBuffer buffer = new StringBuffer(word.length());
+
+        word = word.toUpperCase();
+
+        if (match(word,0,GN_KN_PN_WR_AE))
+            word = word.substring(1);
+
+        if (match(word,0,'X'))
+            word = "S" + word.substring(1);
+
+        if (match(word,0,WH))
+            word = "W" + word.substring(2);
+
+        final int length = word.length();
+        final int last   = length -1;
+
+        for (int n=0; n < length && (maxLength < 0 || (buffer.length() < maxLength)); n++)
+        {
+            char c = word.charAt(n);
+            if (c != 'C' && n > 0 && match(word,n-1,c))
+                continue;
+
+            switch (c)
+            {
+            case 'A':
+                if (n == 0)
+                    buffer.append('A');
+                break;
+
+            case 'B':
+                if ((n != last) || !match(word,n-1,'M'))
+                    buffer.append('B');
+                break;
+
+            case 'C':
+                if (!match(word,n-1,SCE_SCI_SCY))
+                {
+                    if (match(word,n+1,IA))
+                        buffer.append('X');
+                    else if (match(word,n+1,E_I_Y))
+                        buffer.append('S');
+                    else if (match(word,n-1,SCH))
+                        buffer.append('K');
+                    else if (match(word,n+1,'H'))
+                        buffer.append( (n==0 && !isVowel(word,n+2)) ? 'K' : 'X' );
+                    else
+                        buffer.append('K');
+                }
+                break;
+
+            case 'D':
+                buffer.append( match(word,n+1,GE_GI_GY) ? 'J' : 'T' );
+                break;
+
+            case 'E':
+                if (n == 0)
+                    buffer.append('E');
+                break;
+
+            case 'F':
+                buffer.append('F');
+                break;
+
+            case 'G':
+                boolean silent = match(word,n+1,'H') && !isVowel(word,n+2);
+
+                if (n > 0)
+                {
+                    if ( (n+1 == last && match(word,n+1,'N')) ||  // -GN
+                         (n+3 == last && match(word,n+1,NED)) )   // -GNED
+                        silent = true;
+
+                    if (match(word,n-1,DGE_DGI_DGY)) // -DGE- -DGI- -DGY-
+                        silent = true;
+                }
+
+                if (!silent)
+                    buffer.append( (match(word,n+1,E_I_Y) && !match(word,n-1,'G')) ? 'J' : 'K' );
+                break;
+
+            case 'H':
+                if (n < last && !match(word,n-1,C_S_P_T_G) && isVowel(word,n+1))
+                    buffer.append('H');
+                break;
+
+            case 'I':
+                if (n == 0)
+                    buffer.append('I');
+                break;
+
+            case 'J':
+                buffer.append('J');
+                break;
+
+            case 'K':
+                if ((n==0) || !match(word,n-1,'C'))
+                    buffer.append('K');
+                break;
+
+            case 'L':
+                buffer.append('L');
+                break;
+
+            case 'M':
+                buffer.append('M');
+                break;
+
+            case 'N':
+                buffer.append('N');
+                break;
+
+            case 'O':
+                if (n == 0)
+                    buffer.append('O');
+                break;
+
+            case 'P':
+                buffer.append( match(word,n+1,'H') ? 'F' : 'P' );
+                break;
+
+            case 'Q':
+                buffer.append('K');
+                break;
+
+            case 'R':
+                buffer.append('R');
+                break;
+
+            case 'S':
+                if (match(word,n+1,IO_IA))
+                    buffer.append('X');
+                else
+                    buffer.append( match(word,n+1,'H') ? 'X' : 'S' );
+                break;
+
+            case 'T':
+                if (match(word,n+1,IO_IA))
+                    buffer.append('X');
+                else if (match(word,n+1,'H'))
+                {
+                    if (!match(word,n-1,'T'))
+                        buffer.append('0');
+                }
+                else
+                {
+                    if (!match(word,n+1,CH))
+                        buffer.append('T');
+                }
+                break;
+
+            case 'U':
+                if (n == 0)
+                    buffer.append('U');
+                break;
+
+            case 'V':
+                buffer.append('F');
+                break;
+
+            case 'W':
+                if (isVowel(word,n+1))
+                    buffer.append('W');
+                break;
+
+            case 'X':
+                buffer.append("KS");
+                break;
+
+            case 'Y':
+                if (isVowel(word,n+1))
+                    buffer.append('Y');
+                break;
+
+            case 'Z':
+                buffer.append('S');
+                break;
+            }
+        }
+
+        if (maxLength < 0)
+        {
+            // result with unlimited length
+            return buffer.toString();
+        }
+        else
+        {
+            // limit the length of the resulting strings
+            final int bufferLength = Math.min(maxLength, buffer.length());
+            return buffer.toString().substring(0,bufferLength);
+        }
+    }
+
+    /**
+     * Test of algorithm with default constructed encoder.
+     * The encoded arguments are printed to <tt>System.out</tt>.
+     */
+    public static void main (String[] argv)
+    {
+        final Metaphone generator = new Metaphone();
+
+        for (int n = 0; n < argv.length; n++)
+            System.out.println(generator.generateKeys(argv[n])[0]);
+    }
+}
+
Index: org/apache/lucene/search/phonetic/MetaphoneEncoder.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/MetaphoneEncoder.java
diff -N org/apache/lucene/search/phonetic/MetaphoneEncoder.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/MetaphoneEncoder.java	10 Jan 2004 00:41:32 -0000
@@ -0,0 +1,70 @@
+
+/*
+    Phonetix: a class-library of phonetic algorithms.
+    Copyright (C) 2001-2003  Claus Engel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+package org.apache.lucene.search.phonetic;
+
+/**
+ * Abstract base class of "metaphone"-encoders.
+ *
+ * @author Claus Engel
+ */
+public abstract class MetaphoneEncoder extends PhoneticEncoder
+{
+    protected final int maxLength;
+
+    /**
+     * Constructs a phonetic encoder of type "metaphone".
+     * @param maxLength the maximal length of the keys to generate by
+     *                  this <code>MetaphoneEncoder</code>. If the given
+     *                  length is lower than zero, the lengths of the generated
+     *                  keys are only limited by the size of the words to encode.
+     */
+    public MetaphoneEncoder (int maxLength)
+    {
+        this.maxLength = maxLength;
+    }
+
+    protected static boolean isVowel (final String string, int pos)
+    {
+        if (pos < 0 || string.length() <= pos)
+            return false;
+
+        final char c = string.charAt(pos);
+        return c=='A' || c=='E' || c=='I' || c=='O' || c=='U';
+    }
+
+    protected static boolean match (final String string, int pos, final String[] strings)
+    {
+        if (0 <= pos && pos < string.length())
+        {
+            for (int n=strings.length-1; n >= 0; n--)
+            {
+                if (string.regionMatches(pos, strings[n], 0, strings[n].length()))
+                    return true;
+            }
+        }
+        return false;
+    }
+
+    protected static boolean match (final String string, int pos, char c)
+    {
+        return (0 <= pos && pos < string.length()) ? string.charAt(pos) == c : false;
+    }
+}
+
Index: org/apache/lucene/search/phonetic/PhoneticEncoder.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/PhoneticEncoder.java
diff -N org/apache/lucene/search/phonetic/PhoneticEncoder.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/PhoneticEncoder.java	10 Jan 2004 00:41:33 -0000
@@ -0,0 +1,55 @@
+
+/*
+    Phonetix: a class-library of phonetic algorithms.
+    Copyright (C) 2001-2003  Claus Engel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+package org.apache.lucene.search.phonetic;
+
+/**
+ * <code>PhoneticEncoder</code>s generate one or more keys for a given word
+ * using a phonetic algorithm. The goal of each phonetic algorithm is to
+ * generate the same keys for words which have a similar pronunciation.
+ *
+ * @author Claus Engel
+ */
+public abstract class PhoneticEncoder
+{
+    /**
+     * Generates an array of keys.
+     *
+     * @param word the word for which the keys have to be generated.
+     * @return an array of keys. The keys of more importance are found
+     *         at the smaller indices, i.e. the most important key is found
+     *         at index zero. The array is never <code>null</code>, but of length
+     *         zero, if the given word is <code>null</code> or the empty-string.
+     */
+    public abstract String[] generateKeys (String word);
+
+    /**
+     * Generates a key. If the underlying algorithm creates more
+     * than one key, the default key, i.e. the most important key, is returned.
+     *
+     * @param word the word for which the key has to be generated.
+     * @return a key. The result is never <code>null</code>, i.e. if
+     *         the given word is <code>null</code> or the empty-string,
+     *         then the empty-string is returned.
+     */
+    public abstract String generateKey (String word);
+
+    public static final String[] EMPTY_KEYS = new String[0];
+}
+
Index: org/apache/lucene/search/phonetic/PhoneticQuery.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/PhoneticQuery.java
diff -N org/apache/lucene/search/phonetic/PhoneticQuery.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/PhoneticQuery.java	10 Jan 2004 00:41:33 -0000
@@ -0,0 +1,76 @@
+package org.apache.lucene.search.phonetic;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.FilteredTermEnum;
+import org.apache.lucene.search.MultiTermQuery;
+
+import java.io.IOException;
+
+/** Implements the phonetic search query */
+
+public class PhoneticQuery extends MultiTermQuery {
+  public PhoneticQuery(Term term) {
+    super(term);
+  }
+  protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
+    return new PhoneticTermEnum(reader, getTerm());
+  }
+  public String toString(String field) {
+    return super.toString(field) + '$';
+  }
+}
Index: org/apache/lucene/search/phonetic/PhoneticTermEnum.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/PhoneticTermEnum.java
diff -N org/apache/lucene/search/phonetic/PhoneticTermEnum.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/PhoneticTermEnum.java	10 Jan 2004 00:41:34 -0000
@@ -0,0 +1,199 @@
+package org.apache.lucene.search.phonetic;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.FilteredTermEnum;
+
+/**
+ * Subclass of FilteredTermEnum for enumerating all terms that match the
+ * specified phonetic filter term.
+ * <p>
+ * Term enumerations are always ordered by Term.compareTo().  Each term in
+ * the enumeration is greater than all that precede it.
+ */
+
+public class PhoneticTermEnum extends FilteredTermEnum {
+
+    public static final char PHONETIC_CHAR = '$';
+
+    private boolean searchAll = false;
+    private boolean endTerm = false;
+    private String encoding;
+    private Term startTerm;
+    private float distance;
+
+    // create a default Metaphone encoder with unlimited key length
+    // since this is static all readers will use the same encoder, unless the reader
+    // implements PhoneticTermProvider
+    
+    public static PhoneticEncoder defaultEncoder = new Metaphone(-1);
+    public static float minimumDistance = 1.0f;
+
+    PhoneticEncoder encoder = null;
+    
+    /**
+     * Creates a new <code>PhoneticTermEnum</code>.  Passing in a
+     * {@link org.apache.lucene.index.Term Term} that does not end with
+     * <code>SOUNDEX_CHAR</code> will cause an exception to be thrown.
+     */
+    public PhoneticTermEnum(IndexReader reader, Term term) throws IOException {
+        super(reader, term);
+        
+        startTerm = term;
+        
+        if (reader instanceof PhoneticTermProvider) {
+            encoder = ((PhoneticTermProvider)reader).phoneticEncoder();
+            setEnum(((PhoneticTermProvider) reader).phoneticTerms(term));
+        } else {
+            encoder = defaultEncoder;
+            // if no provider, search all terms
+            searchAll = true;
+            encoding = encoder.generateKey(term.text());
+            setEnum(reader.terms(new Term(term.field(), "")));
+        }
+    }
+    
+    /**
+     * only match encoding keys that start with the encoding of the search term
+     */
+    protected final boolean termCompare(Term term) {
+        if(!startTerm.field().equals(term.field())){
+            endTerm = true;
+            return false;
+        }
+        
+        String key = encoder.generateKey(term.text());
+        
+        if(key.equals(encoding))
+            distance = 1.5f;
+        else if(encoding.startsWith(key))
+            distance = 1;
+        else if(key.startsWith(encoding))
+            distance = 1;
+        else
+            return false;
+            
+        // boost prefixes
+        if(term.text().startsWith(startTerm.text()))
+            distance += .5;
+            
+        int editdist = editDistance(term.text(),startTerm.text());
+        int termlen = term.text().length();
+        int startlen = startTerm.text().length();
+        
+        distance = distance - ((float)editdist / (float)Math.min(termlen,startlen));
+
+//        System.err.println("term = "+term+", key =  "+key+", distance = "+distance);
+        
+        if(distance<minimumDistance)
+            return false;            
+        
+        
+        return true;
+    }
+    
+    public final float difference() {
+        return distance;
+    }
+    public final boolean endEnum() {
+        if (!searchAll)
+            // always return false since the enum will return false
+            return false;
+        return endTerm;
+    }
+    
+    /*
+     * code copied from FuzzyTermEnum, but removed unneccessary optimizations for clarity
+     */
+    private final int editDistance(String s, String t) {
+        int n = s.length();
+        int m = t.length();
+        
+        int d[][] = new int[n+1][m+1];
+        
+        int i; // iterates through s
+        int j; // iterates through t
+        char s_i; // ith character of s
+        
+        if (n == 0) return m;
+        if (m == 0) return n;
+        
+        // init matrix d
+        for (i = 0; i <= n; i++) d[i][0] = i;
+        for (j = 0; j <= m; j++) d[0][j] = j;
+        
+        // start computing edit distance
+        for (i = 1; i <= n; i++) {
+            s_i = s.charAt(i - 1);
+            for (j = 1; j <= m; j++) {
+                if (s_i != t.charAt(j-1))
+                    d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1;
+                else d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]);
+            }
+        }
+        
+        // we got the result!
+        return d[n][m];
+    }
+    
+    private final int min(int a, int b, int c) {
+        int t = (a < b) ? a : b;
+        return (t < c) ? t : c;
+    }
+}
Index: org/apache/lucene/search/phonetic/PhoneticTermProvider.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/PhoneticTermProvider.java
diff -N org/apache/lucene/search/phonetic/PhoneticTermProvider.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/PhoneticTermProvider.java	10 Jan 2004 00:41:34 -0000
@@ -0,0 +1,78 @@
+package org.apache.lucene.search.phonetic;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/**
+ * IndexReader optimized phonetic search support, for those readers that can support it
+ */
+public interface PhoneticTermProvider {
+    /**
+     * return an enum for all terms that phonetically match a term
+     *  
+     * @param t the starting term
+     * @return the TermEnum of matching Terms
+     */
+    TermEnum phoneticTerms(Term t);
+    
+    /**
+     * return the phonetic encoder used by the reader, so differences can be calculated
+     * 
+     * @return the phonetic encoder
+     */
+    PhoneticEncoder phoneticEncoder();
+}
Index: org/apache/lucene/search/phonetic/Soundex.java
===================================================================
RCS file: org/apache/lucene/search/phonetic/Soundex.java
diff -N org/apache/lucene/search/phonetic/Soundex.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ org/apache/lucene/search/phonetic/Soundex.java	10 Jan 2004 00:41:34 -0000
@@ -0,0 +1,188 @@
+
+/*
+    Phonetix: a class-library of phonetic algorithms.
+    Copyright (C) 2001-2003  Claus Engel
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+package org.apache.lucene.search.phonetic;
+
+/**
+ * Encoder implementing the phonetic algorithm "Soundex".
+ * Soundex is described in Donald Knuth's
+ * <i>The Art of Computer Programming</i>, Vol.3.
+ *
+ * @see DoubleMetaphone
+ * @see Metaphone
+ * @author Claus Engel
+ */
+public final class Soundex extends PhoneticEncoder
+{
+    private final boolean full;
+    private final int     length;
+
+    /**
+     * Constructs a Soundex encoder which generates keys of given length.
+     * @param full a flag which specifies, whether the first character has to
+     *             to be encoded, or not. If <code>false</code>, then this encoder works
+     *             like the original soundex described by Knuth, i.e. 'Knuth'
+     *             will become 'K530'. If <code>true</code>, then this encoder will encode
+     *             the first character too, i.e. 'Knuth' will become '2530'.
+     * @param length the length of the keys to generate.
+     */
+    public Soundex (boolean full, int length)
+    {
+        this.full   = full;
+        this.length = length;
+    }
+
+    /**
+     * Constructs a Soundex encoder which generates keys of length 4.
+     * @param full a flag which specifies, whether the first character has to
+     *             to be encoded, or not. If <code>false</code>, this encoder works
+     *             like the original soundex described by Knuth, i.e. 'Knuth'
+     *             will become 'K530'. If <code>true</code>, this encoder will encode
+     *             the first character too, i.e. 'Knuth' will become '2530'.
+     */
+    public Soundex (boolean full)
+    {
+        this (full, 4);
+    }
+
+    /**
+     * Constructs an original Soundex encoder which generates keys of given length.
+     * @param length the length of the keys to generate.
+     */
+    public Soundex (int length)
+    {
+        this (false, length);
+    }
+
+    /**
+     * Constructs an original Soundex encoder which generates keys of length 4.
+     */
+    public Soundex ()
+    {
+        this(false, 4);
+    }
+
+    /**
+     * Returns a <tt>String</tt> identifying the algorithm.
+     */
+    public String toString()
+    {
+        return "Soundex_"+full+"_"+length;
+    }
+
+    private static char getCode (final char c)
+    {
+        switch (Character.toLowerCase(c))
+        {
+        case 'b':
+        case 'p':
+        case 'f':
+        case 'v': return '1';
+        case 'c':
+        case 's':
+        case 'k':
+        case 'g':
+        case 'j':
+        case 'q':
+        case 'x':
+        case 'z': return '2';
+        case 'd':
+        case 't': return '3';
+        case 'l': return '4';
+        case 'm':
+        case 'n': return '5';
+        case 'r': return '6';
+        }
+        return '*';
+    }
+
+    /**
+     * Returns the encoding of the given word.
+     * @param word the word to encode.
+     * @return an array with the encoding of the word.
+     *         This is never <code>null</code>.
+     */
+    public String[] generateKeys (final String word)
+    {
+        return (word!=null && word.length()>0) ? new String[] {generateKey(word)} : EMPTY_KEYS;
+    }
+
+    /**
+     * Returns the encoding of the given word.
+     * @param word the word to encode.
+     * @return the encoding of the word. This is never <code>null</code>.
+     */
+    public String generateKey (final String word)
+    {
+        if (length <= 0)
+            return "";
+
+        if (word == null || word.length() == 0)
+            return "";
+
+        final char[] chars = word.toCharArray();
+        final StringBuffer result = new StringBuffer(length);
+
+        int inIdx, outIdx;
+        char prevDigit;
+        if (full)
+        {
+            inIdx = outIdx = 0;
+            prevDigit = '*';
+        }
+        else
+        {
+            inIdx = outIdx = 1;
+            result.append(Character.toUpperCase(chars[0]));
+            prevDigit = getCode(chars[0]);
+        }
+
+        while (inIdx < chars.length && outIdx < length)
+        {
+            char c = getCode(chars[inIdx]);
+
+            if (c!='*' && c!=prevDigit)
+            {
+                result.append(c);
+                outIdx++;
+            }
+
+            prevDigit = c;
+            inIdx++;
+        }
+
+        for (; outIdx < length; outIdx++)
+            result.append('0');
+
+        return result.toString();
+    }
+
+    /**
+     * Test of algorithm with default constructed encoder.
+     * The encoded arguments are printed to <tt>System.out</tt>.
+     */
+    public static void main (String[] argv)
+    {
+        final Soundex generator = new Soundex();
+
+        for (int n = 0; n < argv.length; n++)
+            System.out.println(generator.generateKey(argv[n]));
+    }
+}
+