diff --git a/api/next/68021.txt b/api/next/68021.txt new file mode 100644 index 00000000000..46156e06654 --- /dev/null +++ b/api/next/68021.txt @@ -0,0 +1,13 @@ +pkg go/ast, func ParseDirective(token.Pos, string) (Directive, bool) #68021 +pkg go/ast, method (*Directive) End() token.Pos #68021 +pkg go/ast, method (*Directive) ParseArgs() ([]DirectiveArg, error) #68021 +pkg go/ast, method (*Directive) Pos() token.Pos #68021 +pkg go/ast, type Directive struct #68021 +pkg go/ast, type Directive struct, Args string #68021 +pkg go/ast, type Directive struct, ArgsPos token.Pos #68021 +pkg go/ast, type Directive struct, Name string #68021 +pkg go/ast, type Directive struct, Slash token.Pos #68021 +pkg go/ast, type Directive struct, Tool string #68021 +pkg go/ast, type DirectiveArg struct #68021 +pkg go/ast, type DirectiveArg struct, Arg string #68021 +pkg go/ast, type DirectiveArg struct, Pos token.Pos #68021 diff --git a/doc/next/6-stdlib/99-minor/go/ast/68021.md b/doc/next/6-stdlib/99-minor/go/ast/68021.md new file mode 100644 index 00000000000..0ff1a0b11e8 --- /dev/null +++ b/doc/next/6-stdlib/99-minor/go/ast/68021.md @@ -0,0 +1,4 @@ +The new [ParseDirective] function parses [directive +comments](/doc/comment#Syntax), which are comments such as `//go:generate`. +Source code tools can support their own directive comments and this new API +should help them implement the conventional syntax. diff --git a/src/go/ast/directive.go b/src/go/ast/directive.go new file mode 100644 index 00000000000..901ed0ba0e0 --- /dev/null +++ b/src/go/ast/directive.go @@ -0,0 +1,179 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import ( + "fmt" + "go/token" + "strconv" + "strings" + "unicode" + "unicode/utf8" +) + +// A Directive is a comment of this form: +// +// //tool:name args +// +// For example, this directive: +// +// //go:generate stringer -type Op -trimprefix Op +// +// would have Tool "go", Name "generate", and Args "stringer -type Op +// -trimprefix Op". +// +// While Args does not have a strict syntax, by convention it is a +// space-separated sequence of unquoted words, '"'-quoted Go strings, or +// '`'-quoted raw strings. +// +// See https://go.dev/doc/comment#directives for specification. +type Directive struct { + Tool string + Name string + Args string // no leading or trailing whitespace + + // Slash is the position of the "//" at the beginning of the directive. + Slash token.Pos + + // ArgsPos is the position where Args begins, based on the position passed + // to ParseDirective. + ArgsPos token.Pos +} + +// ParseDirective parses a single comment line for a directive comment. +// +// If the line is not a directive comment, it returns false. +// +// The provided text must be a single line and should include the leading "//". +// If the text does not start with "//", it returns false. +// +// The caller may provide a file position of the start of c. This will be used +// to track the position of the arguments. This may be [Comment.Slash], +// synthesized by the caller, or simply 0. If the caller passes 0, then the +// positions are effectively byte offsets into the string c. +func ParseDirective(pos token.Pos, c string) (Directive, bool) { + // Fast path to eliminate most non-directive comments. Must be a line + // comment starting with [a-z0-9] + if !(len(c) >= 3 && c[0] == '/' && c[1] == '/' && isalnum(c[2])) { + return Directive{}, false + } + + buf := directiveScanner{c, pos} + buf.skip(len("//")) + + // Check for a valid directive and parse tool part. + // + // This logic matches isDirective. (We could combine them, but isDirective + // itself is duplicated in several places.) + colon := strings.Index(buf.str, ":") + if colon <= 0 || colon+1 >= len(buf.str) { + return Directive{}, false + } + for i := 0; i <= colon+1; i++ { + if i == colon { + continue + } + if !isalnum(buf.str[i]) { + return Directive{}, false + } + } + tool := buf.take(colon) + buf.skip(len(":")) + + // Parse name and args. + name := buf.takeNonSpace() + buf.skipSpace() + argsPos := buf.pos + args := strings.TrimRightFunc(buf.str, unicode.IsSpace) + + return Directive{tool, name, args, pos, argsPos}, true +} + +func isalnum(b byte) bool { + return 'a' <= b && b <= 'z' || '0' <= b && b <= '9' +} + +func (d *Directive) Pos() token.Pos { return d.Slash } +func (d *Directive) End() token.Pos { return token.Pos(int(d.ArgsPos) + len(d.Args)) } + +// A DirectiveArg is an argument to a directive comment. +type DirectiveArg struct { + // Arg is the parsed argument string. If the argument was a quoted string, + // this is its unquoted form. + Arg string + // Pos is the position of the first character in this argument. + Pos token.Pos +} + +// ParseArgs parses a [Directive]'s arguments using the standard convention, +// which is a sequence of tokens, where each token may be a bare word, or a +// double quoted Go string, or a back quoted raw Go string. Each token must be +// separated by one or more Unicode spaces. +// +// If the arguments do not conform to this syntax, it returns an error. +func (d *Directive) ParseArgs() ([]DirectiveArg, error) { + args := directiveScanner{d.Args, d.ArgsPos} + + list := []DirectiveArg{} + for args.skipSpace(); args.str != ""; args.skipSpace() { + var arg string + argPos := args.pos + + switch args.str[0] { + default: + arg = args.takeNonSpace() + + case '`', '"': + q, err := strconv.QuotedPrefix(args.str) + if err != nil { // Always strconv.ErrSyntax + return nil, fmt.Errorf("invalid quoted string in //%s:%s: %s", d.Tool, d.Name, args.str) + } + // Any errors will have been returned by QuotedPrefix + arg, _ = strconv.Unquote(args.take(len(q))) + + // Check that the quoted string is followed by a space (or nothing) + if args.str != "" { + r, _ := utf8.DecodeRuneInString(args.str) + if !unicode.IsSpace(r) { + return nil, fmt.Errorf("invalid quoted string in //%s:%s: %s", d.Tool, d.Name, args.str) + } + } + } + + list = append(list, DirectiveArg{arg, argPos}) + } + return list, nil +} + +// directiveScanner is a helper for parsing directive comments while maintaining +// position information. +type directiveScanner struct { + str string + pos token.Pos +} + +func (s *directiveScanner) skip(n int) { + s.pos += token.Pos(n) + s.str = s.str[n:] +} + +func (s *directiveScanner) take(n int) string { + res := s.str[:n] + s.skip(n) + return res +} + +func (s *directiveScanner) takeNonSpace() string { + i := strings.IndexFunc(s.str, unicode.IsSpace) + if i == -1 { + i = len(s.str) + } + return s.take(i) +} + +func (s *directiveScanner) skipSpace() { + trim := strings.TrimLeftFunc(s.str, unicode.IsSpace) + s.skip(len(s.str) - len(trim)) +} diff --git a/src/go/ast/directive_test.go b/src/go/ast/directive_test.go new file mode 100644 index 00000000000..ffabe584cb8 --- /dev/null +++ b/src/go/ast/directive_test.go @@ -0,0 +1,251 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ast + +import ( + "go/token" + "reflect" + "strings" + "testing" +) + +func TestParseDirectiveMatchesIsDirective(t *testing.T) { + for _, tt := range isDirectiveTests { + want := tt.ok + if strings.HasPrefix(tt.in, "extern ") || strings.HasPrefix(tt.in, "export ") { + // ParseDirective does NOT support extern or export, unlike + // isDirective. + want = false + } + + if _, ok := ParseDirective(0, "//"+tt.in); ok != want { + t.Errorf("ParseDirective(0, %q) = %v, want %v", "// "+tt.in, ok, want) + } + } +} + +func TestParseDirective(t *testing.T) { + for _, test := range []struct { + name string + in string + pos token.Pos + want Directive + wantOK bool + }{ + { + name: "valid", + in: "//go:generate stringer -type Op -trimprefix Op", + pos: 10, + want: Directive{ + Tool: "go", + Name: "generate", + Args: "stringer -type Op -trimprefix Op", + Slash: 10, + ArgsPos: token.Pos(10 + len("//go:generate ")), + }, + wantOK: true, + }, + { + name: "no args", + in: "//go:build ignore", + pos: 20, + want: Directive{ + Tool: "go", + Name: "build", + Args: "ignore", + Slash: 20, + ArgsPos: token.Pos(20 + len("//go:build ")), + }, + wantOK: true, + }, + { + name: "not a directive", + in: "// not a directive", + pos: 30, + wantOK: false, + }, + { + name: "not a comment", + in: "go:generate", + pos: 40, + wantOK: false, + }, + { + name: "empty", + in: "", + pos: 50, + wantOK: false, + }, + { + name: "just slashes", + in: "//", + pos: 60, + wantOK: false, + }, + { + name: "no name", + in: "//go:", + pos: 70, + wantOK: false, + }, + { + name: "no tool", + in: "//:generate", + pos: 80, + wantOK: false, + }, + { + name: "multiple spaces", + in: "//go:build foo bar", + pos: 90, + want: Directive{ + Tool: "go", + Name: "build", + Args: "foo bar", + Slash: 90, + ArgsPos: token.Pos(90 + len("//go:build ")), + }, + wantOK: true, + }, + { + name: "trailing space", + in: "//go:build foo ", + pos: 100, + want: Directive{ + Tool: "go", + Name: "build", + Args: "foo", + Slash: 100, + ArgsPos: token.Pos(100 + len("//go:build ")), + }, + wantOK: true, + }, + } { + t.Run(test.name, func(t *testing.T) { + got, gotOK := ParseDirective(test.pos, test.in) + if gotOK != test.wantOK { + t.Fatalf("ParseDirective(%q) ok = %v, want %v", test.in, gotOK, test.wantOK) + } + if !reflect.DeepEqual(got, test.want) { + t.Errorf("ParseDirective(%q) = %+v, want %+v", test.in, got, test.want) + } + }) + } +} + +func TestParseArgs(t *testing.T) { + for _, test := range []struct { + name string + in Directive + want []DirectiveArg + wantErr bool + }{ + { + name: "simple", + in: Directive{ + Tool: "go", + Name: "generate", + Args: "stringer -type Op", + ArgsPos: 10, + }, + want: []DirectiveArg{ + {"stringer", 10}, + {"-type", token.Pos(10 + len("stringer "))}, + {"Op", token.Pos(10 + len("stringer -type "))}, + }, + }, + { + name: "quoted", + in: Directive{ + Tool: "go", + Name: "generate", + Args: "\"foo bar\" baz", + ArgsPos: 10, + }, + want: []DirectiveArg{ + {"foo bar", 10}, + {"baz", token.Pos(10 + len("\"foo bar\" "))}, + }, + }, + { + name: "raw quoted", + in: Directive{ + Tool: "go", + Name: "generate", + Args: "`foo bar` baz", + ArgsPos: 10, + }, + want: []DirectiveArg{ + {"foo bar", 10}, + {"baz", token.Pos(10 + len("`foo bar` "))}, + }, + }, + { + name: "escapes", + in: Directive{ + Tool: "go", + Name: "generate", + Args: "\"foo\\U0001F60Abar\" `a\\tb`", + ArgsPos: 10, + }, + want: []DirectiveArg{ + {"foo😊bar", 10}, + {"a\\tb", token.Pos(10 + len("\"foo\\U0001F60Abar\" "))}, + }, + }, + { + name: "empty args", + in: Directive{ + Tool: "go", + Name: "build", + Args: "", + ArgsPos: 10, + }, + want: []DirectiveArg{}, + }, + { + name: "spaces", + in: Directive{ + Tool: "go", + Name: "build", + Args: " foo bar ", + ArgsPos: 10, + }, + want: []DirectiveArg{ + {"foo", token.Pos(10 + len(" "))}, + {"bar", token.Pos(10 + len(" foo "))}, + }, + }, + { + name: "unterminated quote", + in: Directive{ + Tool: "go", + Name: "generate", + Args: "`foo", + }, + wantErr: true, + }, + { + name: "no space after quote", + in: Directive{ + Tool: "go", + Name: "generate", + Args: `"foo"bar`, + }, + wantErr: true, + }, + } { + t.Run(test.name, func(t *testing.T) { + got, err := test.in.ParseArgs() + if err != nil && !test.wantErr { + t.Errorf("got ParseArgs(%+v) = error %s; want %+v", test.in, err, test.want) + } else if err == nil && test.wantErr { + t.Errorf("got ParseArgs(%+v) = %+v; want error", test.in, got) + } else if err == nil && !reflect.DeepEqual(got, test.want) { + t.Errorf("got ParseArgs(%+v) = %+v; want %+v", test.in, got, test.want) + } + }) + } +}