forgejo/services/actions/run.go
Mathieu Fenniak 0ecc6ef632 fix(actions): improve errors when ${{ needs... }} is used in strategy.matrix incorrectly (#10298)
Three fixes are presented together in this PR:
- When a `strategy.matrix` entry in an Action job contains `${{ needs.some-job.outputs.some-output }}`, if that output *never* becomes available, different error messages will be presented if `some-job` isn't found or if `some-output` isn't found.  This clarifies an error message that was previously "it could be this, or it could be this".
- In the error case described in the previous point, other jobs in the workflow could continue running or could be left "blocked" forever.  A centralized `FailRunPreExecutionError` function ensures that all incomplete jobs in the run are failed in this case.
- In a rare error case when a job referenced another job in `strategy.matrix` but no other jobs were defined in the workflow, the job would be marked as blocked forever because the `job_emitter` code would never be invoked to detect this case.  A new `consistencyCheckRun` function for a newly created `ActionRun` adds a location to perform a pre-execution check for this case so that the run can be failed.

These fixes are all interconnected around the refactor for the `FailRunPreExecutionError`, causing them to be bundled rather than individual PRs.

## Checklist

The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org).

### Tests

- I added test coverage for Go changes...
  - [x] in their respective `*_test.go` for unit tests.
  - [ ] in the `tests/integration` directory if it involves interactions with a live Forgejo server.
- I added test coverage for JavaScript changes...
  - [ ] in `web_src/js/*.test.js` if it can be unit tested.
  - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)).

### Documentation

- [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change.
- [x] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [x] I do not want this change to show in the release notes.
    - These are fixes to an unreleased feature and don't require release notes.
- [ ] I want the title to show in the release notes with a link to this pull request.
- [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/10298
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2025-12-05 17:17:37 +01:00

158 lines
5 KiB
Go

// Copyright 2025 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: GPL-3.0-or-later
package actions
import (
"context"
"slices"
"strings"
actions_model "forgejo.org/models/actions"
"forgejo.org/models/db"
"forgejo.org/modules/timeutil"
)
func killRun(ctx context.Context, run *actions_model.ActionRun, newStatus actions_model.Status) error {
return db.WithTx(ctx, func(ctx context.Context) error {
jobs, err := actions_model.GetRunJobsByRunID(ctx, run.ID)
if err != nil {
return err
}
for _, job := range jobs {
oldStatus := job.Status
if oldStatus.IsDone() {
continue
}
if job.TaskID == 0 {
job.Status = newStatus
job.Stopped = timeutil.TimeStampNow()
_, err := actions_model.UpdateRunJobWithoutNotification(ctx, job, nil, "status", "stopped")
if err != nil {
return err
}
continue
}
if err := StopTask(ctx, job.TaskID, newStatus); err != nil {
return err
}
}
if run.NeedApproval {
if err := actions_model.UpdateRunApprovalByID(ctx, run.ID, actions_model.DoesNotNeedApproval, 0); err != nil {
return err
}
}
CreateCommitStatus(ctx, jobs...)
return nil
})
}
func CancelRun(ctx context.Context, run *actions_model.ActionRun) error {
return killRun(ctx, run, actions_model.StatusCancelled)
}
func ApproveRun(ctx context.Context, run *actions_model.ActionRun, doerID int64) error {
return db.WithTx(ctx, func(ctx context.Context) error {
jobs, err := actions_model.GetRunJobsByRunID(ctx, run.ID)
if err != nil {
return err
}
for _, job := range jobs {
if len(job.Needs) == 0 && job.Status.IsBlocked() {
job.Status = actions_model.StatusWaiting
_, err := UpdateRunJob(ctx, job, nil, "status")
if err != nil {
return err
}
}
}
CreateCommitStatus(ctx, jobs...)
return actions_model.UpdateRunApprovalByID(ctx, run.ID, actions_model.DoesNotNeedApproval, doerID)
})
}
func FailRunPreExecutionError(ctx context.Context, run *actions_model.ActionRun, errorCode actions_model.PreExecutionError, details []any) error {
if run.PreExecutionErrorCode != 0 {
// Already have one error; keep it.
return nil
}
return db.WithTx(ctx, func(ctx context.Context) error {
run.Status = actions_model.StatusFailure
run.PreExecutionErrorCode = errorCode
run.PreExecutionErrorDetails = details
if err := actions_model.UpdateRunWithoutNotification(ctx, run,
"pre_execution_error_code", "pre_execution_error_details", "status"); err != nil {
return err
}
// Also mark every pending job as Failed so nothing remains in a waiting/blocked state.
return killRun(ctx, run, actions_model.StatusFailure)
})
}
// Perform pre-execution checks that would affect the ability for a job to reach an executing stage.
func consistencyCheckRun(ctx context.Context, run *actions_model.ActionRun) error {
jobs, err := actions_model.GetRunJobsByRunID(ctx, run.ID)
if err != nil {
return err
}
for _, job := range jobs {
if stop, err := checkJobWillRevisit(ctx, job); err != nil {
return err
} else if stop {
break
}
}
return nil
}
func checkJobWillRevisit(ctx context.Context, job *actions_model.ActionRunJob) (bool, error) {
// If a job has a matrix like `${{ needs.other-job.outputs.some-output }}`, it will be marked as an
// `IncompleteMatrix` job until the `other-job` is completed, and it will be marked as StatusBlocked; then when
// `other-job` is completed, the job_emitter will check dependent jobs and revisit them. But, it's possible that
// the job didn't list `other-job` in its `needs: [...]` list -- in this case, a job will be marked as StatusBlocked
// forever.
//
// Check to ensure that a job marked with `IncompleteMatrix` doesn't refer to a job that it doesn't have listed in
// `needs`. If that state is discovered, fail the job and mark a PreExecutionError on the run.
isIncompleteMatrix, matrixNeeds, err := job.IsIncompleteMatrix()
if err != nil {
return false, err
}
if !isIncompleteMatrix || matrixNeeds == nil {
// Not actually IncompleteMatrix, or has no information about the `${{ needs... }}` reference, nothing we can do
// here.
return false, nil
}
requiredJob := matrixNeeds.Job
needs := job.Needs
if slices.Contains(needs, requiredJob) {
// Looks good, the needed job is listed in `needs`. It's possible that the matrix may be incomplete by
// referencing multiple different outputs, and not *all* outputs are in the job's `needs`... `requiredJob` will
// only be the first one that was found while evaluating the matrix. But as long as at least one job is listed
// in `needs`, the job should be revisited by job_emitter and end up at a final resolution.
return false, nil
}
// Job doesn't seem like it can proceed; mark the run with an error.
if err := job.LoadRun(ctx); err != nil {
return false, err
}
if err := FailRunPreExecutionError(ctx, job.Run, actions_model.ErrorCodeIncompleteMatrixMissingJob, []any{
job.JobID,
requiredJob,
strings.Join(needs, ", "),
}); err != nil {
return false, err
}
return true, nil
}