mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	Another crack at bug #1460340: make random.sample(dict)
work, this time by ugly brute force.
This commit is contained in:
		
							parent
							
								
									2a9a6b0e86
								
							
						
					
					
						commit
						c17976e983
					
				
					 3 changed files with 37 additions and 5 deletions
				
			
		|  | @ -285,6 +285,15 @@ def sample(self, population, k): | ||||||
|         large population:   sample(xrange(10000000), 60) |         large population:   sample(xrange(10000000), 60) | ||||||
|         """ |         """ | ||||||
| 
 | 
 | ||||||
|  |         # XXX Although the documentation says `population` is "a sequence", | ||||||
|  |         # XXX attempts are made to cater to any iterable with a __len__ | ||||||
|  |         # XXX method.  This has had mixed success.  Examples from both | ||||||
|  |         # XXX sides:  sets work fine, and should become officially supported; | ||||||
|  |         # XXX dicts are much harder, and have failed in various subtle | ||||||
|  |         # XXX ways across attempts.  Support for mapping types should probably | ||||||
|  |         # XXX be dropped (and users should pass mapping.keys() or .values() | ||||||
|  |         # XXX explicitly). | ||||||
|  | 
 | ||||||
|         # Sampling without replacement entails tracking either potential |         # Sampling without replacement entails tracking either potential | ||||||
|         # selections (the pool) in a list or previous selections in a set. |         # selections (the pool) in a list or previous selections in a set. | ||||||
| 
 | 
 | ||||||
|  | @ -304,7 +313,9 @@ def sample(self, population, k): | ||||||
|         setsize = 21        # size of a small set minus size of an empty list |         setsize = 21        # size of a small set minus size of an empty list | ||||||
|         if k > 5: |         if k > 5: | ||||||
|             setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets |             setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets | ||||||
|         if n <= setsize:    # is an n-length list smaller than a k-length set |         if n <= setsize or hasattr(population, "keys"): | ||||||
|  |             # An n-length list is smaller than a k-length set, or this is a | ||||||
|  |             # mapping type so the other algorithm wouldn't work. | ||||||
|             pool = list(population) |             pool = list(population) | ||||||
|             for i in xrange(k):         # invariant:  non-selected at [0,n-i) |             for i in xrange(k):         # invariant:  non-selected at [0,n-i) | ||||||
|                 j = _int(random() * (n-i)) |                 j = _int(random() * (n-i)) | ||||||
|  | @ -320,10 +331,10 @@ def sample(self, population, k): | ||||||
|                         j = _int(random() * n) |                         j = _int(random() * n) | ||||||
|                     selected_add(j) |                     selected_add(j) | ||||||
|                     result[i] = population[j] |                     result[i] = population[j] | ||||||
|             except (TypeError, KeyError):   # handle sets and dictionaries |             except (TypeError, KeyError):   # handle (at least) sets | ||||||
|                 if isinstance(population, list): |                 if isinstance(population, list): | ||||||
|                     raise |                     raise | ||||||
|                 return self.sample(list(population), k) |                 return self.sample(tuple(population), k) | ||||||
|         return result |         return result | ||||||
| 
 | 
 | ||||||
| ## -------------------- real-valued distributions  ------------------- | ## -------------------- real-valued distributions  ------------------- | ||||||
|  |  | ||||||
|  | @ -93,12 +93,28 @@ def test_sample_inputs(self): | ||||||
|         self.gen.sample(set(range(20)), 2) |         self.gen.sample(set(range(20)), 2) | ||||||
|         self.gen.sample(range(20), 2) |         self.gen.sample(range(20), 2) | ||||||
|         self.gen.sample(xrange(20), 2) |         self.gen.sample(xrange(20), 2) | ||||||
|         self.gen.sample(dict.fromkeys('abcdefghijklmnopqrst'), 2) |  | ||||||
|         self.gen.sample(str('abcdefghijklmnopqrst'), 2) |         self.gen.sample(str('abcdefghijklmnopqrst'), 2) | ||||||
|         self.gen.sample(tuple('abcdefghijklmnopqrst'), 2) |         self.gen.sample(tuple('abcdefghijklmnopqrst'), 2) | ||||||
|  | 
 | ||||||
|  |     def test_sample_on_dicts(self): | ||||||
|  |         self.gen.sample(dict.fromkeys('abcdefghijklmnopqrst'), 2) | ||||||
|  | 
 | ||||||
|         # SF bug #1460340 -- random.sample can raise KeyError |         # SF bug #1460340 -- random.sample can raise KeyError | ||||||
|         a = dict.fromkeys(range(10)+range(10,100,2)+range(100,110)) |         a = dict.fromkeys(range(10)+range(10,100,2)+range(100,110)) | ||||||
|         self.gen.sample(a,3) |         self.gen.sample(a, 3) | ||||||
|  | 
 | ||||||
|  |         # A followup to bug #1460340:  sampling from a dict could return | ||||||
|  |         # a subset of its keys or of its values, depending on the size of | ||||||
|  |         # the subset requested. | ||||||
|  |         N = 30 | ||||||
|  |         d = dict((i, complex(i, i)) for i in xrange(N)) | ||||||
|  |         for k in xrange(N+1): | ||||||
|  |             samp = self.gen.sample(d, k) | ||||||
|  |             # Verify that we got ints back (keys); the values are complex. | ||||||
|  |             for x in samp: | ||||||
|  |                 self.assert_(type(x) is int) | ||||||
|  |         samp.sort() | ||||||
|  |         self.assertEqual(samp, range(N)) | ||||||
| 
 | 
 | ||||||
|     def test_gauss(self): |     def test_gauss(self): | ||||||
|         # Ensure that the seed() method initializes all the hidden state.  In |         # Ensure that the seed() method initializes all the hidden state.  In | ||||||
|  |  | ||||||
|  | @ -489,6 +489,11 @@ Extension Modules | ||||||
| Library | Library | ||||||
| ------- | ------- | ||||||
| 
 | 
 | ||||||
|  | - Bug #1460340: ``random.sample(dict)`` failed in various ways.  Dicts | ||||||
|  |   aren't officially supported here, and trying to use them will probably | ||||||
|  |   raise an exception some day.  But dicts have been allowed, and "mostly | ||||||
|  |   worked", so support for them won't go away without warning. | ||||||
|  | 
 | ||||||
| - Bug #1445068: getpass.getpass() can now be given an explicit stream | - Bug #1445068: getpass.getpass() can now be given an explicit stream | ||||||
|   argument to specify where to write the prompt. |   argument to specify where to write the prompt. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Tim Peters
						Tim Peters