| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  | #include "Python.h"
 | 
					
						
							| 
									
										
										
										
											2023-09-06 15:56:08 +02:00
										 |  |  | #include "pycore_code.h"          // _PyCode_GetVarnames()
 | 
					
						
							| 
									
										
										
										
											2021-05-21 10:57:35 +01:00
										 |  |  | #include "pycore_frame.h"
 | 
					
						
							| 
									
										
										
										
											2023-09-06 15:56:08 +02:00
										 |  |  | #include "pycore_pyerrors.h"      // export _Py_UTF8_Edit_Cost()
 | 
					
						
							|  |  |  | #include "pycore_runtime.h"       // _Py_ID()
 | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  | #define MAX_CANDIDATE_ITEMS 750
 | 
					
						
							|  |  |  | #define MAX_STRING_SIZE 40
 | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  | #define MOVE_COST 2
 | 
					
						
							|  |  |  | #define CASE_COST 1
 | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  | #define LEAST_FIVE_BITS(n) ((n) & 31)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline int | 
					
						
							|  |  |  | substitution_cost(char a, char b) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (LEAST_FIVE_BITS(a) != LEAST_FIVE_BITS(b)) { | 
					
						
							|  |  |  |         // Not the same, not a case flip.
 | 
					
						
							|  |  |  |         return MOVE_COST; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (a == b) { | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     if ('A' <= a && a <= 'Z') { | 
					
						
							|  |  |  |         a += ('a' - 'A'); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if ('A' <= b && b <= 'Z') { | 
					
						
							|  |  |  |         b += ('a' - 'A'); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (a == b) { | 
					
						
							|  |  |  |         return CASE_COST; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return MOVE_COST; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Calculate the Levenshtein distance between string1 and string2 */ | 
					
						
							|  |  |  | static Py_ssize_t | 
					
						
							|  |  |  | levenshtein_distance(const char *a, size_t a_size, | 
					
						
							|  |  |  |                      const char *b, size_t b_size, | 
					
						
							| 
									
										
										
										
											2022-11-30 16:55:16 +05:30
										 |  |  |                      size_t max_cost, size_t *buffer) | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     // Both strings are the same (by identity)
 | 
					
						
							|  |  |  |     if (a == b) { | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     // Trim away common affixes.
 | 
					
						
							|  |  |  |     while (a_size && b_size && a[0] == b[0]) { | 
					
						
							|  |  |  |         a++; a_size--; | 
					
						
							|  |  |  |         b++; b_size--; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     while (a_size && b_size && a[a_size-1] == b[b_size-1]) { | 
					
						
							|  |  |  |         a_size--; | 
					
						
							|  |  |  |         b_size--; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (a_size == 0 || b_size == 0) { | 
					
						
							|  |  |  |         return (a_size + b_size) * MOVE_COST; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (a_size > MAX_STRING_SIZE || b_size > MAX_STRING_SIZE) { | 
					
						
							|  |  |  |         return max_cost + 1; | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     // Prefer shorter buffer
 | 
					
						
							|  |  |  |     if (b_size < a_size) { | 
					
						
							|  |  |  |         const char *t = a; a = b; b = t; | 
					
						
							|  |  |  |         size_t t_size = a_size; a_size = b_size; b_size = t_size; | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     // quick fail when a match is impossible.
 | 
					
						
							|  |  |  |     if ((b_size - a_size) * MOVE_COST > max_cost) { | 
					
						
							|  |  |  |         return max_cost + 1; | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     // Instead of producing the whole traditional len(a)-by-len(b)
 | 
					
						
							|  |  |  |     // matrix, we can update just one row in place.
 | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     // Initialize the buffer row
 | 
					
						
							| 
									
										
										
										
											2022-05-02 19:09:35 +02:00
										 |  |  |     size_t tmp = MOVE_COST; | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     for (size_t i = 0; i < a_size; i++) { | 
					
						
							|  |  |  |         // cost from b[:0] to a[:i+1]
 | 
					
						
							| 
									
										
										
										
											2022-05-02 19:09:35 +02:00
										 |  |  |         buffer[i] = tmp; | 
					
						
							|  |  |  |         tmp += MOVE_COST; | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     size_t result = 0; | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     for (size_t b_index = 0; b_index < b_size; b_index++) { | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |         char code = b[b_index]; | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |         // cost(b[:b_index], a[:0]) == b_index * MOVE_COST
 | 
					
						
							|  |  |  |         size_t distance = result = b_index * MOVE_COST; | 
					
						
							|  |  |  |         size_t minimum = SIZE_MAX; | 
					
						
							|  |  |  |         for (size_t index = 0; index < a_size; index++) { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // cost(b[:b_index+1], a[:index+1]) = min(
 | 
					
						
							|  |  |  |             //     // 1) substitute
 | 
					
						
							|  |  |  |             //     cost(b[:b_index], a[:index])
 | 
					
						
							|  |  |  |             //         + substitution_cost(b[b_index], a[index]),
 | 
					
						
							|  |  |  |             //     // 2) delete from b
 | 
					
						
							|  |  |  |             //     cost(b[:b_index], a[:index+1]) + MOVE_COST,
 | 
					
						
							|  |  |  |             //     // 3) delete from a
 | 
					
						
							|  |  |  |             //     cost(b[:b_index+1], a[index]) + MOVE_COST
 | 
					
						
							|  |  |  |             // )
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // 1) Previous distance in this row is cost(b[:b_index], a[:index])
 | 
					
						
							|  |  |  |             size_t substitute = distance + substitution_cost(code, a[index]); | 
					
						
							|  |  |  |             // 2) cost(b[:b_index], a[:index+1]) from previous row
 | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |             distance = buffer[index]; | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |             // 3) existing result is cost(b[:b_index+1], a[index])
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             size_t insert_delete = Py_MIN(result, distance) + MOVE_COST; | 
					
						
							|  |  |  |             result = Py_MIN(insert_delete, substitute); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             // cost(b[:b_index+1], a[:index+1])
 | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |             buffer[index] = result; | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |             if (result < minimum) { | 
					
						
							|  |  |  |                 minimum = result; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (minimum > max_cost) { | 
					
						
							|  |  |  |             // Everything in this row is too big, so bail early.
 | 
					
						
							|  |  |  |             return max_cost + 1; | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-17 19:39:42 +01:00
										 |  |  | PyObject * | 
					
						
							|  |  |  | _Py_CalculateSuggestions(PyObject *dir, | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |                       PyObject *name) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     assert(!PyErr_Occurred()); | 
					
						
							|  |  |  |     assert(PyList_CheckExact(dir)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_ssize_t dir_size = PyList_GET_SIZE(dir); | 
					
						
							|  |  |  |     if (dir_size >= MAX_CANDIDATE_ITEMS) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |     Py_ssize_t suggestion_distance = PY_SSIZE_T_MAX; | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     PyObject *suggestion = NULL; | 
					
						
							| 
									
										
										
										
											2021-04-15 00:03:43 +01:00
										 |  |  |     Py_ssize_t name_size; | 
					
						
							|  |  |  |     const char *name_str = PyUnicode_AsUTF8AndSize(name, &name_size); | 
					
						
							| 
									
										
										
										
											2021-04-14 15:10:33 +01:00
										 |  |  |     if (name_str == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-11-30 16:55:16 +05:30
										 |  |  |     size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE); | 
					
						
							|  |  |  |     if (buffer == NULL) { | 
					
						
							|  |  |  |         return PyErr_NoMemory(); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |     for (int i = 0; i < dir_size; ++i) { | 
					
						
							|  |  |  |         PyObject *item = PyList_GET_ITEM(dir, i); | 
					
						
							| 
									
										
										
										
											2023-06-23 19:53:27 +03:00
										 |  |  |         if (_PyUnicode_Equal(name, item)) { | 
					
						
							|  |  |  |             continue; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-04-15 00:03:43 +01:00
										 |  |  |         Py_ssize_t item_size; | 
					
						
							|  |  |  |         const char *item_str = PyUnicode_AsUTF8AndSize(item, &item_size); | 
					
						
							| 
									
										
										
										
											2021-04-14 15:10:33 +01:00
										 |  |  |         if (item_str == NULL) { | 
					
						
							| 
									
										
										
										
											2022-11-30 16:55:16 +05:30
										 |  |  |             PyMem_Free(buffer); | 
					
						
							| 
									
										
										
										
											2021-04-14 15:10:33 +01:00
										 |  |  |             return NULL; | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |         // No more than 1/3 of the involved characters should need changed.
 | 
					
						
							|  |  |  |         Py_ssize_t max_distance = (name_size + item_size + 3) * MOVE_COST / 6; | 
					
						
							|  |  |  |         // Don't take matches we've already beaten.
 | 
					
						
							|  |  |  |         max_distance = Py_MIN(max_distance, suggestion_distance - 1); | 
					
						
							|  |  |  |         Py_ssize_t current_distance = | 
					
						
							| 
									
										
										
										
											2022-11-30 16:55:16 +05:30
										 |  |  |             levenshtein_distance(name_str, name_size, item_str, | 
					
						
							|  |  |  |                                  item_size, max_distance, buffer); | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  |         if (current_distance > max_distance) { | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  |             continue; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         if (!suggestion || current_distance < suggestion_distance) { | 
					
						
							|  |  |  |             suggestion = item; | 
					
						
							|  |  |  |             suggestion_distance = current_distance; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-11-30 16:55:16 +05:30
										 |  |  |     PyMem_Free(buffer); | 
					
						
							| 
									
										
										
										
											2022-11-10 09:03:39 +01:00
										 |  |  |     return Py_XNewRef(suggestion); | 
					
						
							| 
									
										
										
										
											2021-04-14 02:36:07 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  | Py_ssize_t | 
					
						
							|  |  |  | _Py_UTF8_Edit_Cost(PyObject *a, PyObject *b, Py_ssize_t max_cost) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     assert(PyUnicode_Check(a) && PyUnicode_Check(b)); | 
					
						
							|  |  |  |     Py_ssize_t size_a, size_b; | 
					
						
							|  |  |  |     const char *utf8_a = PyUnicode_AsUTF8AndSize(a, &size_a); | 
					
						
							|  |  |  |     if (utf8_a == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     const char *utf8_b = PyUnicode_AsUTF8AndSize(b, &size_b); | 
					
						
							|  |  |  |     if (utf8_b == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (max_cost == -1) { | 
					
						
							|  |  |  |         max_cost = MOVE_COST * Py_MAX(size_a, size_b); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-11-30 16:55:16 +05:30
										 |  |  |     size_t *buffer = PyMem_New(size_t, MAX_STRING_SIZE); | 
					
						
							|  |  |  |     if (buffer == NULL) { | 
					
						
							|  |  |  |         PyErr_NoMemory(); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     Py_ssize_t res = levenshtein_distance(utf8_a, size_a, | 
					
						
							|  |  |  |                                     utf8_b, size_b, max_cost, buffer); | 
					
						
							|  |  |  |     PyMem_Free(buffer); | 
					
						
							|  |  |  |     return res; | 
					
						
							| 
									
										
										
										
											2021-05-03 11:47:27 -04:00
										 |  |  | } | 
					
						
							|  |  |  | 
 |