mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			177 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			177 lines
		
	
	
	
		
			4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|   [Header: soundexmodule.c,v 1.2 95/05/02 15:40:45 dwwillia Exp ]
 | |
|   
 | |
|   Perform soundex comparisons on strings.
 | |
| 
 | |
|   Soundex is an algorithm that hashes English strings into numerical value.
 | |
|   Strings that sound the same are hashed to the same value.  This allows 
 | |
|   for non-literal string matching.
 | |
| 
 | |
|   From: David Wayne Williams <dwwillia@iucf.indiana.edu>
 | |
| 
 | |
|   Apr 29 1996 - added get_soundex method that returns the soundex of a
 | |
|                 string (chrish@qnx.com)
 | |
|   May 2 1996  - added doc strings (chrish@qnx.com)
 | |
| */
 | |
| 
 | |
| #include <string.h>
 | |
| #include <ctype.h>
 | |
| #include "Python.h"
 | |
| 
 | |
| static char soundex_module__doc__[] =
 | |
| "Perform Soundex comparisons on strings, allowing non-literal matching.";
 | |
| 
 | |
| static void soundex_hash(char *str, char *result)
 | |
| {
 | |
|     char *sptr = str;           /* pointer into str */
 | |
|     char *rptr = result;        /* pointer into result */
 | |
|     
 | |
|     if(*str == '\0')
 | |
|     {
 | |
|         strcpy(result,"000000");
 | |
|         return;
 | |
|     }
 | |
|             
 | |
|     /*  Preserve the first character of the input string.
 | |
|      */
 | |
|     *(rptr++) = toupper(*(sptr++));
 | |
|     
 | |
|     /* Translate the rest of the input string into result.  The following
 | |
|        transformations are used:
 | |
| 
 | |
|        1) All vowels, W, and H, are skipped.
 | |
| 
 | |
|        2) BFPV = 1
 | |
|           CGJKQSXZ = 2
 | |
|           DT = 3
 | |
|           L = 4
 | |
|           MN = 5
 | |
|           R = 6
 | |
| 
 | |
|        3) Only translate the first of adjacent equal translations.  I.E.
 | |
|           remove duplicate digits.
 | |
|     */
 | |
| 
 | |
|     for(;(rptr - result) < 6 &&  *sptr != '\0';sptr++)
 | |
|     {
 | |
|         switch (toupper(*sptr))
 | |
|         {
 | |
|         case 'W':
 | |
|         case 'H':
 | |
|         case 'A':
 | |
|         case 'I':
 | |
|         case 'O':
 | |
|         case 'U':
 | |
|         case 'Y':
 | |
|             break;
 | |
|         case 'B':
 | |
|         case 'F':
 | |
|         case 'P':
 | |
|         case 'V':
 | |
|             if(*(rptr - 1) != '1')
 | |
|                 *(rptr++) = '1';
 | |
|             break;
 | |
|         case 'C':
 | |
|         case 'G':
 | |
|         case 'J':
 | |
|         case 'K':
 | |
|         case 'Q':
 | |
|         case 'S':
 | |
|         case 'X':
 | |
|         case 'Z':
 | |
|             if(*(rptr - 1) != '2')
 | |
|                 *(rptr++) = '2';
 | |
|             break;
 | |
|         case 'D':
 | |
|         case 'T':
 | |
|             if(*(rptr - 1) != '3')
 | |
|                 *(rptr++) = '3';
 | |
|             break;
 | |
|         case 'L':
 | |
|             if(*(rptr - 1) != '4')
 | |
|                 *(rptr++) = '4';
 | |
|             break;
 | |
|         case 'M':
 | |
|         case 'N':
 | |
|             if(*(rptr - 1) != '5')
 | |
|                 *(rptr++) = '5';
 | |
|             break;
 | |
|         case 'R':
 | |
|             if(*(rptr -1) != '6')
 | |
|                 *(rptr++) = '6';
 | |
|         default:
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* Pad 0's on right side of string out to 6 characters.
 | |
|      */
 | |
|     for(; rptr < result + 6; rptr++)
 | |
|         *rptr = '0';
 | |
| 
 | |
|     /* Terminate the result string.
 | |
|      */
 | |
|     *(result + 6) = '\0';
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Return the actual soundex value.         */
 | |
| /* Added by Chris Herborth (chrish@qnx.com) */
 | |
| static char soundex_get_soundex__doc__[] =
 | |
| 	"Return the (English) Soundex hash value for a string.";
 | |
| static PyObject *
 | |
| get_soundex(PyObject *self, PyObject *args)
 | |
| {
 | |
| 	char *str;
 | |
| 	char sdx[7];
 | |
| 
 | |
| 	if(!PyArg_ParseTuple( args, "s", &str))
 | |
| 	  return NULL;
 | |
| 
 | |
| 	soundex_hash(str, sdx);
 | |
| 
 | |
| 	return PyString_FromString(sdx);
 | |
| }
 | |
| 
 | |
| static char soundex_sound_similar__doc__[] =
 | |
| 	"Compare two strings to see if they sound similar (English).";
 | |
| static PyObject *
 | |
| sound_similar(PyObject *self, PyObject *args)
 | |
| {
 | |
|     char *str1, *str2;
 | |
|     char res1[7], res2[7];
 | |
|     
 | |
|     if(!PyArg_ParseTuple(args, "ss", &str1, &str2))
 | |
|         return NULL;
 | |
| 
 | |
|     soundex_hash(str1, res1);
 | |
|     soundex_hash(str2, res2);
 | |
| 
 | |
|     if(!strcmp(res1,res2))
 | |
|         return Py_BuildValue("i",1);
 | |
|     else
 | |
|         return Py_BuildValue("i",0);
 | |
| }
 | |
| 
 | |
| /* Python Method Table.
 | |
|  */
 | |
| static PyMethodDef SoundexMethods[] =
 | |
| {
 | |
| 	{"sound_similar", sound_similar, 1, soundex_sound_similar__doc__},
 | |
| 	{"get_soundex", get_soundex, 1, soundex_get_soundex__doc__},
 | |
| 
 | |
|     {NULL, NULL }               /* sentinel */
 | |
| };
 | |
| 
 | |
| 
 | |
| /* Register the method table.
 | |
|  */
 | |
| DL_EXPORT(void)
 | |
| initsoundex()
 | |
| {
 | |
|     (void) Py_InitModule4("soundex",
 | |
| 			  SoundexMethods,
 | |
| 			  soundex_module__doc__,
 | |
| 			  (PyObject *)NULL,
 | |
| 			  PYTHON_API_VERSION);
 | |
| }
 | 
