mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Fix inconsistent return type for statistics median_grouped() gh-92531 (#92533)
This commit is contained in:
		
							parent
							
								
									5bc2390229
								
							
						
					
					
						commit
						e01eeb7b4b
					
				
					 3 changed files with 22 additions and 14 deletions
				
			
		| 
						 | 
				
			
			@ -611,7 +611,7 @@ def median_high(data):
 | 
			
		|||
    return data[n // 2]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def median_grouped(data, interval=1):
 | 
			
		||||
def median_grouped(data, interval=1.0):
 | 
			
		||||
    """Estimates the median for numeric data binned around the midpoints
 | 
			
		||||
    of consecutive, fixed-width intervals.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -650,35 +650,34 @@ def median_grouped(data, interval=1):
 | 
			
		|||
    by exact multiples of *interval*.  This is essential for getting a
 | 
			
		||||
    correct result.  The function does not check this precondition.
 | 
			
		||||
 | 
			
		||||
    Inputs may be any numeric type that can be coerced to a float during
 | 
			
		||||
    the interpolation step.
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
    data = sorted(data)
 | 
			
		||||
    n = len(data)
 | 
			
		||||
    if n == 0:
 | 
			
		||||
    if not n:
 | 
			
		||||
        raise StatisticsError("no median for empty data")
 | 
			
		||||
    elif n == 1:
 | 
			
		||||
        return data[0]
 | 
			
		||||
 | 
			
		||||
    # Find the value at the midpoint. Remember this corresponds to the
 | 
			
		||||
    # midpoint of the class interval.
 | 
			
		||||
    x = data[n // 2]
 | 
			
		||||
 | 
			
		||||
    # Generate a clear error message for non-numeric data
 | 
			
		||||
    for obj in (x, interval):
 | 
			
		||||
        if isinstance(obj, (str, bytes)):
 | 
			
		||||
            raise TypeError(f'expected a number but got {obj!r}')
 | 
			
		||||
 | 
			
		||||
    # Using O(log n) bisection, find where all the x values occur in the data.
 | 
			
		||||
    # All x will lie within data[i:j].
 | 
			
		||||
    i = bisect_left(data, x)
 | 
			
		||||
    j = bisect_right(data, x, lo=i)
 | 
			
		||||
 | 
			
		||||
    # Coerce to floats, raising a TypeError if not possible
 | 
			
		||||
    try:
 | 
			
		||||
        interval = float(interval)
 | 
			
		||||
        x = float(x)
 | 
			
		||||
    except ValueError:
 | 
			
		||||
        raise TypeError(f'Value cannot be converted to a float')
 | 
			
		||||
 | 
			
		||||
    # Interpolate the median using the formula found at:
 | 
			
		||||
    # https://www.cuemath.com/data/median-of-grouped-data/
 | 
			
		||||
    try:
 | 
			
		||||
        L = x - interval / 2  # The lower limit of the median interval.
 | 
			
		||||
    except TypeError:
 | 
			
		||||
        # Coerce mixed types to float.
 | 
			
		||||
        L = float(x) - float(interval) / 2
 | 
			
		||||
    L = x - interval / 2.0    # Lower limit of the median interval
 | 
			
		||||
    cf = i                    # Cumulative frequency of the preceding interval
 | 
			
		||||
    f = j - i                 # Number of elements in the median internal
 | 
			
		||||
    return L + interval * (n / 2 - cf) / f
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1742,6 +1742,12 @@ def test_repeated_single_value(self):
 | 
			
		|||
                data = [x]*count
 | 
			
		||||
                self.assertEqual(self.func(data), float(x))
 | 
			
		||||
 | 
			
		||||
    def test_single_value(self):
 | 
			
		||||
        # Override method from AverageMixin.
 | 
			
		||||
        # Average of a single value is the value as a float.
 | 
			
		||||
        for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
 | 
			
		||||
            self.assertEqual(self.func([x]), float(x))
 | 
			
		||||
 | 
			
		||||
    def test_odd_fractions(self):
 | 
			
		||||
        # Test median_grouped works with an odd number of Fractions.
 | 
			
		||||
        F = Fraction
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,3 @@
 | 
			
		|||
The statistics.median_grouped() function now always return a float.
 | 
			
		||||
Formerly, it did not convert the input type when for sequences of length
 | 
			
		||||
one.
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue