Soundex routine(s) from Ross Patterson (see EMail
Addresses)
Captured from a message in a public Internet news group (see Internet
- Newsgroups)
converted to OS/2 REXX by Bernd Schemmer
/* Message from the author: */
/* */
/* Someone asked for a copy of the SOUNDEX algorithm in REXX. I */
/* recently dug the following up from my archives, in response to a */
/* request for the NYSIIS algorithm. They're both there, so you get */
/* two for the price of one! */
/* */
/* The SOUNDEX implementation is mine, and while true to the */
/* definition of SOUNDEX, it is NOT the exact algorithm published so */
/* many years ago. That algorithm was heavily loop-based and didn't */
/* perform well. This algorithm produces the same results much more */
/* quickly, relying on some REXX-isms like Space(). The NYSIIS */
/* implementation is a literal translation of Jeff Kell's */
/* implementation (written in an old language called SPL) into REXX */
/* by yours truly. */
/* */
/* Please don't criticise the coding style, I was quite a bit younger */
/* when I wrote these ;-) */
/* */
/* Enjoy, */
/* Ross Patterson */
/* Sterling Software, Inc. */
/* VM Software Division */
/* */
/* */
/**********************************************************************/
/* NAMEHASH: Phonetic Name Indexing Routine Jeff Kell */
/* */
/* This routine produces two different phonetic name keys from */
/* a given stored name in the form (Last,First) with a comma */
/* as a delimiter to the last name. The first algorithm used */
/* is the Soundex algorithm, variant 1: */
/* (1) Convert characters to numerics using table: */
/* 0 = A,E,H,I,O,U,W,Y */
/* 1 = B,F,P,V */
/* 2 = C,G,J,K,Q,S,X,Z */
/* 3 = D,T */
/* 4 = L */
/* 5 = M,N */
/* 6 = R */
/* (2) Make multiple digits single. */
/* (3) Remove zeroes after first position. */
/* (4) Fill on right with zeroes to make 6 characters. */
/* (5) Replace first digit with first character of name. */
/* */
/* Stated reliability of Soundex is 95.99% with selectivity */
/* factor of .213% for a name inquiry. */
/* */
/* The second algorithm is the New York State Identification */
/* and Intelligence System, or NYSIIS algorithm. This routine */
/* is more reliable and selective than Soundex, especially for */
/* grouped phonetic sounds. It does not perform well with 'Y' */
/* groups as 'Y' is not translated. NYSIIS yields an alpha */
/* key which is filled or rounded to 10 characters: */
/* (1) Translate first characters of name: */
/* MAC => MCC KN => NN K => C */
/* PH => FF PF => FF SCH => SSS */
/* (2) Translate last characters of name: */
/* EE => Y IE => Y */
/* DT,RT,RD,NT,ND => D */
/* (3) First character of key = first character of name. */
/* (4) Translate remaining characters by following rules, */
/* incrementing by one character each time: */
/* a. EV => AF else A,E,I,O,U => A */
/* b. Q => G Z => S M => N */
/* c. KN => N else K => C */
/* d. SCH => SSS PH => FF */
/* e. H => If previous or next is nonvowel, previous */
/* f. W => If previous is vowel, previous */
/* Add current to key if current <> last key character */
/* (5) If last character is S, remove it */
/* (6) If last characters are AY, replace with Y */
/* (7) If last character is A, remove it */
/* */
/* Stated reliability of NYSIIS is 98.72% with a selectivity */
/* factor of .164% for a name inquiry. */
/* */
/* Both algorithms are taken from Robert L. Taft, "Name Search */
/* Techniques", New York State Identification and Intelligence */
/* System. */
/* */
/* SPL version by Jeff Kell, U. Tennasee at Chatanooga */
/* Translated to REXX by Ross Patterson, Rutgers University */
/* on 05/05/88 */
/* */
/**********************************************************************/
do forever
call LineOut , "Enter the input for the algorithms (RETURN to end): "
thisName = lineIN()
if thisName = "" then
leave
Parse value NameHash( thisName ) with RC Soundex NYIIS .
say "RC = " || rc || ", soundex = " || soundex || ", NYIIS = " || nyiis
end /* do forever */
exit 0
/* ------------------------------------------------------------------ */
/* function: calculate the SOUNDEX and NYSIIS values for a string */
/* */
/* call: NameHash name */
/* */
/* where: name */
/* */
/* returns: rc soundex nyiis */
/* */
NameHash: Procedure
Name = Space( Translate( Arg( 1 ) ),0 )
Parse var Name Name "," .
If Name = "" then
Return 1
Return 0 Soundex( Name,6 ) NYSIIS( Name )
/* ------------------------------------------------------------------ */
/* function: SOUNDEX translation from source1 to target1 */
/* */
/* call: Soundex name {,length) */
/* */
/* where: name */
/* length */
/* */
/* returns: soundex value */
/* */
Soundex: Procedure
Source = Arg( 1 )
Length = Arg( 2 )
If Length = "" then
Length = 6
Result = Left( Source,1 )
SoundexNum = "01230120022455012623010202"
/* ABCDEFGHIJKLMNOPQRSTUVWXYZ */
Source = Translate( Source,SoundexNum,"ABCDEFGHIJKLMNOPQRSTUVWXYZ","0" )
Do I = 1 to Length( Source )
Do J = I+1 to Length( Source ) ,
While Substr( Source,I,1 ) = Substr( Source,J,1 )
Source = Left( Source,J-1 )'0'Substr( Source,J+1 )
End
End
Result = Result || Substr( Space( Translate( Source, " ","0" ),0 ),2 )
Return Left( Result,Length,"0" )
/* ------------------------------------------------------------------ */
/* function: NYSIIS PHONETIC CODE TRANSLATION FROM SOURCE TO TARGET */
/* */
/* call: NYSIIS name */
/* */
/* where: name */
/* */
/* returns: NYSIIS value */
/* */
NYSIIS: Procedure
Source = Arg( 1 )
Select
When Left( Source,3 ) = "MAC" then
Source = "MCC"Substr( Source,4 )
When Left( Source,3 ) = "SCH" then
Source = "SSS"Substr( Source,4 )
When Left( Source,2 ) = "KN" then
Source = "NN"Substr( Source,3 )
When Left( Source,2 ) = "PH" | ,
Left( Source,2 ) = "PF" then
Source = "FF"Substr( Source,3 )
When Left( Source,1 ) = "K" then
Source = "C"Substr( Source,2 )
Otherwise
Nop
End /* select */
Ending = Right( Source,2 )
If Ending = "EE" | Ending = "IE" then
Source = Left( Source,1,Length( Source )-2 )"Y"
If Ending = "DT" | Ending = "RT" | ,
Ending = "RD" | Ending = "NT" | ,
Ending = "ND" then
Source = Left( Source,1,Length( Source )-2 )"D"
Result = Left( Source,1 )
Do Cursor = 2 to Length( Source )
Char = ScanNYSIIS( )
/* original code: */
/* If Char ^= Right( Result,1 ) then */
/* replaced with: */
If Char <> Right( Result,1 ) then
Result = Result || Char
End
If Right( Result,1 ) = "S" then
Target1 = Left( Target1,Length( Target1 )-1 )
If Right( Result,2 ) = "AY" then
Target1 = Left( Target1,Length( Target1 )-2 )"Y"
If Right( Result,1 ) = "A" then
Target1 = Left( Target1,Length( Target1 )-1 )
Return Left( Result,10 )
/* ------------------------------------------------------------------ */
/* sub routine of NYSIIS */
/* */
ScanNYSIIS: Procedure expose Source Cursor
Vowels = "AEIOU"
Chars = Substr( Source,Cursor,3 )
Char = Left( Chars,1 )
Select
When Left( Chars,2 ) = "EV" then
Result = "AF"
/* original code: */
/* When Find( Char,Vowels ) > 0 then */
/* replaced with: */
When pos( Char,Vowels ) > 0 then
Result = "A"
When Char = "Q" then
Result = "G"
When Char = "Z" then
Result = "S"
When Char = "M" then
Result = "N"
When Left( Chars,2 ) = "KN" then
Result = "N"
When Char = "K" then
Result = "C"
When Left( Chars,3 ) = "SCH" then
Result = "SSS"
When Left( Chars,2 ) = "PH" then
Result = "FF"
When Char = "H" then
Do
If Find( Substr( Source,Cursor-1,1 ),Vowels ) = 0 then
Result = Substr( Source,Cursor-1,1 )
Else If Find( Substr( Chars,2,1 ),Vowels ) = 0 then
Result = Substr( Chars,2,1 )
End
When Cursor = "W" then ,
If Find( Substr( Source,Cursor-1,1 ),Vowels ) > 0 then
Result = Substr( Source,Cursor-1,1 )
otherwise
nop
End /* select */
Source = Left( Source,Cursor-1 ) || Result || Substr( Source,Cursor+1 )
Return Left( Result,1 )