diff --git a/src/testing/benchmark.go b/src/testing/benchmark.go
index 78e1b2de6d1..8b84444f382 100644
--- a/src/testing/benchmark.go
+++ b/src/testing/benchmark.go
@@ -78,7 +78,7 @@ type InternalBenchmark struct {
 }
 
 // B is a type passed to [Benchmark] functions to manage benchmark
-// timing and to specify the number of iterations to run.
+// timing and control the number of iterations.
 //
 // A benchmark ends when its Benchmark function returns or calls any of the methods
 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called
@@ -133,8 +133,7 @@ func (b *B) StartTimer() {
 }
 
 // StopTimer stops timing a test. This can be used to pause the timer
-// while performing complex initialization that you don't
-// want to measure.
+// while performing steps that you don't want to measure.
 func (b *B) StopTimer() {
 	if b.timerOn {
 		b.duration += highPrecisionTimeSince(b.start)
@@ -387,7 +386,7 @@ func (b *B) loopSlowPath() bool {
 		b.ResetTimer()
 		return true
 	}
-	// Handles fixed time case
+	// Handles fixed iterations case
 	if b.benchTime.n > 0 {
 		if b.N < b.benchTime.n {
 			b.N = b.benchTime.n
@@ -396,31 +395,42 @@ func (b *B) loopSlowPath() bool {
 		}
 		return false
 	}
-	// Handles fixed iteration count case
+	// Handles fixed time case
 	return b.stopOrScaleBLoop()
 }
 
-// Loop returns true until b.N calls has been made to it.
+// Loop returns true as long as the benchmark should continue running.
 //
-// A benchmark should either use Loop or contain an explicit loop from 0 to b.N, but not both.
-// After the benchmark finishes, b.N will contain the total number of calls to op, so the benchmark
-// may use b.N to compute other average metrics.
+// A typical benchmark is structured like:
 //
-// The parameters and results of function calls inside the body of "for b.Loop() {...}" are guaranteed
-// not to be optimized away.
-// Also, the local loop scaling for b.Loop ensures the benchmark function containing the loop will only
-// be executed once, i.e. for such construct:
-//
-//	testing.Benchmark(func(b *testing.B) {
-//			...(setup)
-//			for b.Loop() {
-//				...(benchmark logic)
-//			}
-//			...(clean-up)
+//	func Benchmark(b *testing.B) {
+//		... setup ...
+//		for b.Loop() {
+//			... code to measure ...
+//		}
+//		... cleanup ...
 //	}
 //
-// The ...(setup) and ...(clean-up) logic will only be executed once.
-// Also benchtime=Nx (N>1) will result in exactly N executions instead of N+1 for b.N style loops.
+// Loop resets the benchmark timer the first time it is called in a benchmark,
+// so any setup performed prior to starting the benchmark loop does not count
+// toward the benchmark measurement.
+//
+// The compiler never optimizes away calls to functions within the body of a
+// "for b.Loop() { ... }" loop. This prevents surprises that can otherwise occur
+// if the compiler determines that the result of a benchmarked function is
+// unused. The loop must be written in exactly this form, and this only applies
+// to calls syntactically between the curly braces of the loop. Optimizations
+// are performed as usual in any functions called by the loop.
+//
+// After Loop returns false, b.N contains the total number of iterations that
+// ran, so the benchmark may use b.N to compute other average metrics.
+//
+// Prior to the introduction of Loop, benchmarks were expected to contain an
+// explicit loop from 0 to b.N. Benchmarks should either use Loop or contain a
+// loop to b.N, but not both. Loop offers more automatic management of the
+// benchmark timer, and runs each benchmark function only once per measurement,
+// whereas b.N-based benchmarks must run the benchmark function (and any
+// associated setup and cleanup) several times.
 func (b *B) Loop() bool {
 	if b.loopN != 0 && b.loopN < b.N {
 		b.loopN++
diff --git a/src/testing/benchmark_test.go b/src/testing/benchmark_test.go
index 259b70ed4c9..b3089b31199 100644
--- a/src/testing/benchmark_test.go
+++ b/src/testing/benchmark_test.go
@@ -155,7 +155,7 @@ func ExampleB_Loop() {
 	}
 	n := 0
 	testing.Benchmark(func(b *testing.B) {
-		// Unlike "for i := range N {...}" style loops, this
+		// Unlike "for i := range b.N {...}" style loops, this
 		// setup logic will only be executed once, so simpleFunc
 		// will always get argument 1.
 		n++
@@ -219,7 +219,7 @@ func ExampleB_ReportMetric() {
 	// specific algorithm (in this case, sorting).
 	testing.Benchmark(func(b *testing.B) {
 		var compares int64
-		for i := 0; i < b.N; i++ {
+		for b.Loop() {
 			s := []int{5, 4, 3, 2, 1}
 			slices.SortFunc(s, func(a, b int) int {
 				compares++
diff --git a/src/testing/testing.go b/src/testing/testing.go
index e353ceb7411..8b4bdfbc398 100644
--- a/src/testing/testing.go
+++ b/src/testing/testing.go
@@ -72,27 +72,24 @@
 // A sample benchmark function looks like this:
 //
 //	func BenchmarkRandInt(b *testing.B) {
-//	    for range b.N {
+//	    for b.Loop() {
 //	        rand.Int()
 //	    }
 //	}
 //
-// The benchmark function must run the target code b.N times.
-// It is called multiple times with b.N adjusted until the
-// benchmark function lasts long enough to be timed reliably.
 // The output
 //
 //	BenchmarkRandInt-8   	68453040	        17.8 ns/op
 //
-// means that the loop ran 68453040 times at a speed of 17.8 ns per loop.
+// means that the body of the loop ran 68453040 times at a speed of 17.8 ns per loop.
 //
-// If a benchmark needs some expensive setup before running, the timer
-// may be reset:
+// Only the body of the loop is timed, so benchmarks may do expensive
+// setup before calling b.Loop, which will not be counted toward the
+// benchmark measurement:
 //
 //	func BenchmarkBigLen(b *testing.B) {
 //	    big := NewBig()
-//	    b.ResetTimer()
-//	    for range b.N {
+//	    for b.Loop() {
 //	        big.Len()
 //	    }
 //	}
@@ -120,6 +117,37 @@
 // In particular, https://golang.org/x/perf/cmd/benchstat performs
 // statistically robust A/B comparisons.
 //
+// # b.N-style benchmarks
+//
+// Prior to the introduction of [B.Loop], benchmarks were written in a
+// different style using [B.N]. For example:
+//
+//	func BenchmarkRandInt(b *testing.B) {
+//	    for range b.N {
+//	        rand.Int()
+//	    }
+//	}
+//
+// In this style of benchmark, the benchmark function must run
+// the target code b.N times. The benchmark function is called
+// multiple times with b.N adjusted until the benchmark function
+// lasts long enough to be timed reliably. This also means any setup
+// done before the loop may be run several times.
+//
+// If a benchmark needs some expensive setup before running, the timer
+// should be explicitly reset:
+//
+//	func BenchmarkBigLen(b *testing.B) {
+//	    big := NewBig()
+//	    b.ResetTimer()
+//	    for range b.N {
+//	        big.Len()
+//	    }
+//	}
+//
+// New benchmarks should prefer using [B.Loop], which is more robust
+// and more efficient.
+//
 // # Examples
 //
 // The package also runs and verifies example code. Example functions may