| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | /*
 | 
					
						
							| 
									
										
										
										
											1995-06-14 22:49:20 +00:00
										 |  |  |   [Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ] | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  |    | 
					
						
							|  |  |  |   Perform soundex comparisons on strings. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   Soundex is an algorithm that hashes English strings into numerical value. | 
					
						
							|  |  |  |   Strings that sound the same are hashed to the same value.  This allows  | 
					
						
							|  |  |  |   for non-literal string matching. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   From: David Wayne Williams <dwwillia@iucf.indiana.edu> | 
					
						
							| 
									
										
										
										
											1996-05-23 22:54:17 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |   Apr 29 1996 - added get_soundex method that returns the soundex of a | 
					
						
							|  |  |  |                 string (chrish@qnx.com) | 
					
						
							|  |  |  |   May 2 1996  - added doc strings (chrish@qnx.com) | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <string.h>
 | 
					
						
							|  |  |  | #include <ctype.h>
 | 
					
						
							|  |  |  | #include "Python.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-05-23 22:54:17 +00:00
										 |  |  | static char soundex_module__doc__[] = | 
					
						
							|  |  |  | "Perform Soundex comparisons on strings, allowing non-literal matching."; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-05-24 20:50:57 +00:00
										 |  |  | static void soundex_hash(char *str, char *result) | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | { | 
					
						
							|  |  |  |     char *sptr = str;           /* pointer into str */ | 
					
						
							|  |  |  |     char *rptr = result;        /* pointer into result */ | 
					
						
							|  |  |  |      | 
					
						
							| 
									
										
										
										
											1996-12-05 23:41:26 +00:00
										 |  |  |     if(*str == '\0') | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  |     { | 
					
						
							|  |  |  |         strcpy(result,"000000"); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |              | 
					
						
							|  |  |  |     /*  Preserve the first character of the input string.
 | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     *(rptr++) = toupper(*(sptr++)); | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     /* Translate the rest of the input string into result.  The following
 | 
					
						
							|  |  |  |        transformations are used: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-12-05 23:41:26 +00:00
										 |  |  |        1) All vowels, W, and H, are skipped. | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |        2) BFPV = 1 | 
					
						
							|  |  |  |           CGJKQSXZ = 2 | 
					
						
							|  |  |  |           DT = 3 | 
					
						
							|  |  |  |           L = 4 | 
					
						
							|  |  |  |           MN = 5 | 
					
						
							| 
									
										
										
										
											1996-12-05 23:41:26 +00:00
										 |  |  |           R = 6 | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |        3) Only translate the first of adjacent equal translations.  I.E. | 
					
						
							|  |  |  |           remove duplicate digits. | 
					
						
							|  |  |  |     */ | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-12-05 23:41:26 +00:00
										 |  |  |     for(;(rptr - result) < 6 &&  *sptr != '\0';sptr++) | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  |     { | 
					
						
							|  |  |  |         switch (toupper(*sptr)) | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |         case 'W': | 
					
						
							|  |  |  |         case 'H': | 
					
						
							|  |  |  |         case 'A': | 
					
						
							|  |  |  |         case 'I': | 
					
						
							|  |  |  |         case 'O': | 
					
						
							|  |  |  |         case 'U': | 
					
						
							|  |  |  |         case 'Y': | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case 'B': | 
					
						
							|  |  |  |         case 'F': | 
					
						
							|  |  |  |         case 'P': | 
					
						
							|  |  |  |         case 'V': | 
					
						
							|  |  |  |             if(*(rptr - 1) != '1') | 
					
						
							|  |  |  |                 *(rptr++) = '1'; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case 'C': | 
					
						
							|  |  |  |         case 'G': | 
					
						
							|  |  |  |         case 'J': | 
					
						
							|  |  |  |         case 'K': | 
					
						
							|  |  |  |         case 'Q': | 
					
						
							|  |  |  |         case 'S': | 
					
						
							|  |  |  |         case 'X': | 
					
						
							|  |  |  |         case 'Z': | 
					
						
							|  |  |  |             if(*(rptr - 1) != '2') | 
					
						
							|  |  |  |                 *(rptr++) = '2'; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case 'D': | 
					
						
							|  |  |  |         case 'T': | 
					
						
							|  |  |  |             if(*(rptr - 1) != '3') | 
					
						
							|  |  |  |                 *(rptr++) = '3'; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case 'L': | 
					
						
							|  |  |  |             if(*(rptr - 1) != '4') | 
					
						
							|  |  |  |                 *(rptr++) = '4'; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case 'M': | 
					
						
							|  |  |  |         case 'N': | 
					
						
							|  |  |  |             if(*(rptr - 1) != '5') | 
					
						
							|  |  |  |                 *(rptr++) = '5'; | 
					
						
							|  |  |  |             break; | 
					
						
							| 
									
										
										
										
											1996-12-05 23:41:26 +00:00
										 |  |  |         case 'R': | 
					
						
							|  |  |  |             if(*(rptr -1) != '6') | 
					
						
							|  |  |  |                 *(rptr++) = '6'; | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  |         default: | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Pad 0's on right side of string out to 6 characters.
 | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     for(; rptr < result + 6; rptr++) | 
					
						
							|  |  |  |         *rptr = '0'; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Terminate the result string.
 | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											1996-12-05 23:41:26 +00:00
										 |  |  |     *(result + 6) = '\0'; | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1996-05-23 22:54:17 +00:00
										 |  |  | /* Return the actual soundex value.         */ | 
					
						
							|  |  |  | /* Added by Chris Herborth (chrish@qnx.com) */ | 
					
						
							|  |  |  | static char soundex_get_soundex__doc__[] = | 
					
						
							|  |  |  | 	"Return the (English) Soundex hash value for a string."; | 
					
						
							|  |  |  | static PyObject * | 
					
						
							|  |  |  | get_soundex(PyObject *self, PyObject *args) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 	char *str; | 
					
						
							|  |  |  | 	char sdx[7]; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if(!PyArg_ParseTuple( args, "s", &str)) | 
					
						
							|  |  |  | 	  return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	soundex_hash(str, sdx); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return PyString_FromString(sdx); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static char soundex_sound_similar__doc__[] = | 
					
						
							|  |  |  | 	"Compare two strings to see if they sound similar (English)."; | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | static PyObject * | 
					
						
							|  |  |  | sound_similar(PyObject *self, PyObject *args) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     char *str1, *str2; | 
					
						
							|  |  |  |     char res1[7], res2[7]; | 
					
						
							|  |  |  |      | 
					
						
							|  |  |  |     if(!PyArg_ParseTuple(args, "ss", &str1, &str2)) | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     soundex_hash(str1, res1); | 
					
						
							|  |  |  |     soundex_hash(str2, res2); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if(!strcmp(res1,res2)) | 
					
						
							|  |  |  |         return Py_BuildValue("i",1); | 
					
						
							|  |  |  |     else | 
					
						
							|  |  |  |         return Py_BuildValue("i",0); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Python Method Table.
 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static PyMethodDef SoundexMethods[] = | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											1996-05-23 22:54:17 +00:00
										 |  |  | 	{"sound_similar", sound_similar, 1, soundex_sound_similar__doc__}, | 
					
						
							|  |  |  | 	{"get_soundex", get_soundex, 1, soundex_get_soundex__doc__}, | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  |     {NULL, NULL }               /* sentinel */ | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Register the method table.
 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | initsoundex() | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											1996-05-23 22:54:17 +00:00
										 |  |  |     (void) Py_InitModule4("soundex", | 
					
						
							|  |  |  | 			  SoundexMethods, | 
					
						
							|  |  |  | 			  soundex_module__doc__, | 
					
						
							|  |  |  | 			  (PyObject *)NULL, | 
					
						
							|  |  |  | 			  PYTHON_API_VERSION); | 
					
						
							| 
									
										
										
										
											1995-06-14 22:31:38 +00:00
										 |  |  | } |