mirror of
				https://github.com/python/cpython.git
				synced 2025-10-26 03:04:41 +00:00 
			
		
		
		
	Take Tim's advice and have random.sample() support only sequences and sets.
This commit is contained in:
		
							parent
							
								
									28de64fd0f
								
							
						
					
					
						commit
						1acde190b2
					
				
					 4 changed files with 22 additions and 49 deletions
				
			
		|  | @ -111,8 +111,8 @@ Functions for sequences: | |||
| 
 | ||||
| .. function:: sample(population, k) | ||||
| 
 | ||||
|    Return a *k* length list of unique elements chosen from the population sequence. | ||||
|    Used for random sampling without replacement. | ||||
|    Return a *k* length list of unique elements chosen from the population sequence | ||||
|    or set. Used for random sampling without replacement. | ||||
| 
 | ||||
|    Returns a new list containing elements from the population while leaving the | ||||
|    original population unchanged.  The resulting list is in selection order so that | ||||
|  |  | |||
|  | @ -267,7 +267,7 @@ def shuffle(self, x, random=None, int=int): | |||
|             x[i], x[j] = x[j], x[i] | ||||
| 
 | ||||
|     def sample(self, population, k): | ||||
|         """Chooses k unique random elements from a population sequence. | ||||
|         """Chooses k unique random elements from a population sequence or set. | ||||
| 
 | ||||
|         Returns a new list containing elements from the population while | ||||
|         leaving the original population unchanged.  The resulting list is | ||||
|  | @ -284,15 +284,6 @@ def sample(self, population, k): | |||
|         large population:   sample(range(10000000), 60) | ||||
|         """ | ||||
| 
 | ||||
|         # XXX Although the documentation says `population` is "a sequence", | ||||
|         # XXX attempts are made to cater to any iterable with a __len__ | ||||
|         # XXX method.  This has had mixed success.  Examples from both | ||||
|         # XXX sides:  sets work fine, and should become officially supported; | ||||
|         # XXX dicts are much harder, and have failed in various subtle | ||||
|         # XXX ways across attempts.  Support for mapping types should probably | ||||
|         # XXX be dropped (and users should pass mapping.keys() or .values() | ||||
|         # XXX explicitly). | ||||
| 
 | ||||
|         # Sampling without replacement entails tracking either potential | ||||
|         # selections (the pool) in a list or previous selections in a set. | ||||
| 
 | ||||
|  | @ -303,25 +294,27 @@ def sample(self, population, k): | |||
|         # preferred since the list takes less space than the | ||||
|         # set and it doesn't suffer from frequent reselections. | ||||
| 
 | ||||
|         if isinstance(population, (set, frozenset)): | ||||
|             population = tuple(population) | ||||
|         if not hasattr(population, '__getitem__') or hasattr(population, 'keys'): | ||||
|             raise TypeError("Population must be a sequence or set.  For dicts, use dict.keys().") | ||||
|         random = self.random | ||||
|         n = len(population) | ||||
|         if not 0 <= k <= n: | ||||
|             raise ValueError("sample larger than population") | ||||
|         random = self.random | ||||
|             raise ValueError("Sample larger than population") | ||||
|         _int = int | ||||
|         result = [None] * k | ||||
|         setsize = 21        # size of a small set minus size of an empty list | ||||
|         if k > 5: | ||||
|             setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets | ||||
|         if n <= setsize or hasattr(population, "keys"): | ||||
|             # An n-length list is smaller than a k-length set, or this is a | ||||
|             # mapping type so the other algorithm wouldn't work. | ||||
|         if n <= setsize: | ||||
|             # An n-length list is smaller than a k-length set | ||||
|             pool = list(population) | ||||
|             for i in range(k):         # invariant:  non-selected at [0,n-i) | ||||
|                 j = _int(random() * (n-i)) | ||||
|                 result[i] = pool[j] | ||||
|                 pool[j] = pool[n-i-1]   # move non-selected item into vacancy | ||||
|         else: | ||||
|             try: | ||||
|             selected = set() | ||||
|             selected_add = selected.add | ||||
|             for i in range(k): | ||||
|  | @ -330,10 +323,6 @@ def sample(self, population, k): | |||
|                     j = _int(random() * n) | ||||
|                 selected_add(j) | ||||
|                 result[i] = population[j] | ||||
|             except (TypeError, KeyError):   # handle (at least) sets | ||||
|                 if isinstance(population, list): | ||||
|                     raise | ||||
|                 return self.sample(tuple(population), k) | ||||
|         return result | ||||
| 
 | ||||
| ## -------------------- real-valued distributions  ------------------- | ||||
|  |  | |||
|  | @ -84,26 +84,7 @@ def test_sample_inputs(self): | |||
|         self.gen.sample(tuple('abcdefghijklmnopqrst'), 2) | ||||
| 
 | ||||
|     def test_sample_on_dicts(self): | ||||
|         self.gen.sample(dict.fromkeys('abcdefghijklmnopqrst'), 2) | ||||
| 
 | ||||
|         # SF bug #1460340 -- random.sample can raise KeyError | ||||
|         a = dict.fromkeys(list(range(10)) + | ||||
|                           list(range(10,100,2)) + | ||||
|                           list(range(100,110))) | ||||
|         self.gen.sample(a, 3) | ||||
| 
 | ||||
|         # A followup to bug #1460340:  sampling from a dict could return | ||||
|         # a subset of its keys or of its values, depending on the size of | ||||
|         # the subset requested. | ||||
|         N = 30 | ||||
|         d = dict((i, complex(i, i)) for i in range(N)) | ||||
|         for k in range(N+1): | ||||
|             samp = self.gen.sample(d, k) | ||||
|             # Verify that we got ints back (keys); the values are complex. | ||||
|             for x in samp: | ||||
|                 self.assert_(type(x) is int) | ||||
|         samp.sort() | ||||
|         self.assertEqual(samp, list(range(N))) | ||||
|         self.assertRaises(TypeError, self.gen.sample, dict.fromkeys('abcdef'), 2) | ||||
| 
 | ||||
|     def test_gauss(self): | ||||
|         # Ensure that the seed() method initializes all the hidden state.  In | ||||
|  |  | |||
|  | @ -355,6 +355,9 @@ Library | |||
| - Removed defunct parts of the random module (the Wichmann-Hill generator | ||||
|   and the jumpahead() method). | ||||
| 
 | ||||
| - random.sample() now explicitly supports all sequences and sets while | ||||
|   explicitly excluding mappings. | ||||
| 
 | ||||
| - Patch #467924: add ZipFile.extract() and ZipFile.extractall() in the | ||||
|   zipfile module. | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Raymond Hettinger
						Raymond Hettinger