Update Dependencies (#390)

Co-authored-by: Norwin Roosen <git@nroo.de>
Co-authored-by: Norwin <git@nroo.de>
Reviewed-on: https://gitea.com/gitea/tea/pulls/390
Reviewed-by: 6543 <6543@obermui.de>
Reviewed-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Norwin <noerw@noreply.gitea.io>
Co-committed-by: Norwin <noerw@noreply.gitea.io>
This commit is contained in:
Norwin
2021-08-30 23:18:50 +08:00
committed by Andrew Thornton
parent 4b9907fb54
commit d6df0a53b5
665 changed files with 29466 additions and 24547 deletions

View File

@@ -4,6 +4,7 @@ import (
"fmt"
"os"
"regexp"
"sort"
"strings"
"sync"
"time"
@@ -22,25 +23,57 @@ type Rule struct {
// An Emitter takes group matches and returns tokens.
type Emitter interface {
// Emit tokens for the given regex groups.
Emit(groups []string, lexer Lexer) Iterator
Emit(groups []string, state *LexerState) Iterator
}
// EmitterFunc is a function that is an Emitter.
type EmitterFunc func(groups []string, lexer Lexer) Iterator
type EmitterFunc func(groups []string, state *LexerState) Iterator
// Emit tokens for groups.
func (e EmitterFunc) Emit(groups []string, lexer Lexer) Iterator { return e(groups, lexer) }
func (e EmitterFunc) Emit(groups []string, state *LexerState) Iterator {
return e(groups, state)
}
// ByGroups emits a token for each matching group in the rule's regex.
func ByGroups(emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
iterators := make([]Iterator, 0, len(groups)-1)
if len(emitters) != len(groups)-1 {
iterators = append(iterators, Error.Emit(groups, lexer))
iterators = append(iterators, Error.Emit(groups, state))
// panic(errors.Errorf("number of groups %q does not match number of emitters %v", groups, emitters))
} else {
for i, group := range groups[1:] {
iterators = append(iterators, emitters[i].Emit([]string{group}, lexer))
if emitters[i] != nil {
iterators = append(iterators, emitters[i].Emit([]string{group}, state))
}
}
}
return Concaterator(iterators...)
})
}
// ByGroupNames emits a token for each named matching group in the rule's regex.
func ByGroupNames(emitters map[string]Emitter) Emitter {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
iterators := make([]Iterator, 0, len(state.NamedGroups)-1)
if len(state.NamedGroups)-1 == 0 {
if emitter, ok := emitters[`0`]; ok {
iterators = append(iterators, emitter.Emit(groups, state))
} else {
iterators = append(iterators, Error.Emit(groups, state))
}
} else {
ruleRegex := state.Rules[state.State][state.Rule].Regexp
for i := 1; i < len(state.NamedGroups); i++ {
groupName := ruleRegex.GroupNameFromNumber(i)
group := state.NamedGroups[groupName]
if emitter, ok := emitters[groupName]; ok {
if emitter != nil {
iterators = append(iterators, emitter.Emit([]string{group}, state))
}
} else {
iterators = append(iterators, Error.Emit([]string{group}, state))
}
}
}
return Concaterator(iterators...)
@@ -88,7 +121,7 @@ func ByGroups(emitters ...Emitter) Emitter {
// Note: panic's if the number emitters does not equal the number of matched
// groups in the regex.
func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGroup int, emitters ...Emitter) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
// bounds check
if len(emitters) != len(groups)-1 {
panic("UsingByGroup expects number of emitters to be the same as len(groups)-1")
@@ -106,8 +139,8 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
if err != nil {
panic(err)
}
} else {
iterators[i] = emitters[i].Emit([]string{group}, lexer)
} else if emitters[i] != nil {
iterators[i] = emitters[i].Emit([]string{group}, state)
}
}
@@ -117,7 +150,7 @@ func UsingByGroup(sublexerGetFunc func(string) Lexer, sublexerNameGroup, codeGro
// Using returns an Emitter that uses a given Lexer for parsing and emitting.
func Using(lexer Lexer) Emitter {
return EmitterFunc(func(groups []string, _ Lexer) Iterator {
return EmitterFunc(func(groups []string, _ *LexerState) Iterator {
it, err := lexer.Tokenise(&TokeniseOptions{State: "root", Nested: true}, groups[0])
if err != nil {
panic(err)
@@ -127,9 +160,9 @@ func Using(lexer Lexer) Emitter {
}
// UsingSelf is like Using, but uses the current Lexer.
func UsingSelf(state string) Emitter {
return EmitterFunc(func(groups []string, lexer Lexer) Iterator {
it, err := lexer.Tokenise(&TokeniseOptions{State: state, Nested: true}, groups[0])
func UsingSelf(stateName string) Emitter {
return EmitterFunc(func(groups []string, state *LexerState) Iterator {
it, err := state.Lexer.Tokenise(&TokeniseOptions{State: stateName, Nested: true}, groups[0])
if err != nil {
panic(err)
}
@@ -139,6 +172,9 @@ func UsingSelf(state string) Emitter {
// Words creates a regex that matches any of the given literal words.
func Words(prefix, suffix string, words ...string) string {
sort.Slice(words, func(i, j int) bool {
return len(words[j]) < len(words[i])
})
for i, word := range words {
words[i] = regexp.QuoteMeta(word)
}
@@ -162,10 +198,10 @@ func Tokenise(lexer Lexer, options *TokeniseOptions, text string) ([]Token, erro
type Rules map[string][]Rule
// Rename clones rules then a rule.
func (r Rules) Rename(old, new string) Rules {
func (r Rules) Rename(oldRule, newRule string) Rules {
r = r.Clone()
r[new] = r[old]
delete(r, old)
r[newRule] = r[oldRule]
delete(r, oldRule)
return r
}
@@ -188,9 +224,31 @@ func (r Rules) Merge(rules Rules) Rules {
return out
}
// MustNewLazyLexer creates a new Lexer with deferred rules generation or panics.
func MustNewLazyLexer(config *Config, rulesFunc func() Rules) *RegexLexer {
lexer, err := NewLazyLexer(config, rulesFunc)
if err != nil {
panic(err)
}
return lexer
}
// NewLazyLexer creates a new regex-based Lexer with deferred rules generation.
func NewLazyLexer(config *Config, rulesFunc func() Rules) (*RegexLexer, error) {
if config == nil {
config = &Config{}
}
return &RegexLexer{
config: config,
compilerFunc: rulesFunc,
}, nil
}
// MustNewLexer creates a new Lexer or panics.
func MustNewLexer(config *Config, rules Rules) *RegexLexer {
lexer, err := NewLexer(config, rules)
//
// Deprecated: Use MustNewLazyLexer instead.
func MustNewLexer(config *Config, rules Rules) *RegexLexer { // nolint: forbidigo
lexer, err := NewLexer(config, rules) // nolint: forbidigo
if err != nil {
panic(err)
}
@@ -201,34 +259,10 @@ func MustNewLexer(config *Config, rules Rules) *RegexLexer {
//
// "rules" is a state machine transitition map. Each key is a state. Values are sets of rules
// that match input, optionally modify lexer state, and output tokens.
func NewLexer(config *Config, rules Rules) (*RegexLexer, error) {
if config == nil {
config = &Config{}
}
if _, ok := rules["root"]; !ok {
return nil, fmt.Errorf("no \"root\" state")
}
compiledRules := map[string][]*CompiledRule{}
for state, rules := range rules {
compiledRules[state] = nil
for _, rule := range rules {
flags := ""
if !config.NotMultiline {
flags += "m"
}
if config.CaseInsensitive {
flags += "i"
}
if config.DotAll {
flags += "s"
}
compiledRules[state] = append(compiledRules[state], &CompiledRule{Rule: rule, flags: flags})
}
}
return &RegexLexer{
config: config,
rules: compiledRules,
}, nil
//
// Deprecated: Use NewLazyLexer instead.
func NewLexer(config *Config, rules Rules) (*RegexLexer, error) { // nolint: forbidigo
return NewLazyLexer(config, func() Rules { return rules })
}
// Trace enables debug tracing.
@@ -260,10 +294,13 @@ type LexerState struct {
Rule int
// Group matches.
Groups []string
// Named Group matches.
NamedGroups map[string]string
// Custum context for mutators.
MutatorContext map[interface{}]interface{}
iteratorStack []Iterator
options *TokeniseOptions
newlineAdded bool
}
// Set mutator context.
@@ -278,7 +315,11 @@ func (l *LexerState) Get(key interface{}) interface{} {
// Iterator returns the next Token from the lexer.
func (l *LexerState) Iterator() Token { // nolint: gocognit
for l.Pos < len(l.Text) && len(l.Stack) > 0 {
end := len(l.Text)
if l.newlineAdded {
end--
}
for l.Pos < end && len(l.Stack) > 0 {
// Exhaust the iterator stack, if any.
for len(l.iteratorStack) > 0 {
n := len(l.iteratorStack) - 1
@@ -298,7 +339,7 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
if !ok {
panic("unknown state " + l.State)
}
ruleIndex, rule, groups := matchRules(l.Text, l.Pos, selectedRule)
ruleIndex, rule, groups, namedGroups := matchRules(l.Text, l.Pos, selectedRule)
// No match.
if groups == nil {
// From Pygments :\
@@ -316,6 +357,7 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
}
l.Rule = ruleIndex
l.Groups = groups
l.NamedGroups = namedGroups
l.Pos += utf8.RuneCountInString(groups[0])
if rule.Mutator != nil {
if err := rule.Mutator.Mutate(l); err != nil {
@@ -323,7 +365,7 @@ func (l *LexerState) Iterator() Token { // nolint: gocognit
}
}
if rule.Type != nil {
l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l.Lexer))
l.iteratorStack = append(l.iteratorStack, rule.Type.Emit(l.Groups, l))
}
}
// Exhaust the IteratorStack, if any.
@@ -353,9 +395,11 @@ type RegexLexer struct {
analyser func(text string) float32
trace bool
mu sync.Mutex
compiled bool
rules map[string][]*CompiledRule
mu sync.Mutex
compiled bool
rules map[string][]*CompiledRule
compilerFunc func() Rules
compileOnce sync.Once
}
// SetAnalyser sets the analyser function used to perform content inspection.
@@ -390,7 +434,7 @@ func (r *RegexLexer) maybeCompile() (err error) {
pattern = "(?" + rule.flags + ")" + pattern
}
pattern = `\G` + pattern
rule.Regexp, err = regexp2.Compile(pattern, 0)
rule.Regexp, err = regexp2.Compile(pattern, regexp2.RE2)
if err != nil {
return fmt.Errorf("failed to compile rule %s.%d: %s", state, i, err)
}
@@ -422,7 +466,43 @@ restart:
return nil
}
func (r *RegexLexer) compileRules() error {
rules := r.compilerFunc()
if _, ok := rules["root"]; !ok {
return fmt.Errorf("no \"root\" state")
}
compiledRules := map[string][]*CompiledRule{}
for state, rules := range rules {
compiledRules[state] = nil
for _, rule := range rules {
flags := ""
if !r.config.NotMultiline {
flags += "m"
}
if r.config.CaseInsensitive {
flags += "i"
}
if r.config.DotAll {
flags += "s"
}
compiledRules[state] = append(compiledRules[state], &CompiledRule{Rule: rule, flags: flags})
}
}
r.rules = compiledRules
return nil
}
func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator, error) { // nolint
var err error
if r.compilerFunc != nil {
r.compileOnce.Do(func() {
err = r.compileRules()
})
}
if err != nil {
return nil, err
}
if err := r.maybeCompile(); err != nil {
return nil, err
}
@@ -432,10 +512,13 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator,
if options.EnsureLF {
text = ensureLF(text)
}
newlineAdded := false
if !options.Nested && r.config.EnsureNL && !strings.HasSuffix(text, "\n") {
text += "\n"
newlineAdded = true
}
state := &LexerState{
newlineAdded: newlineAdded,
options: options,
Lexer: r,
Text: []rune(text),
@@ -446,18 +529,20 @@ func (r *RegexLexer) Tokenise(options *TokeniseOptions, text string) (Iterator,
return state.Iterator, nil
}
func matchRules(text []rune, pos int, rules []*CompiledRule) (int, *CompiledRule, []string) {
func matchRules(text []rune, pos int, rules []*CompiledRule) (int, *CompiledRule, []string, map[string]string) {
for i, rule := range rules {
match, err := rule.Regexp.FindRunesMatchStartingAt(text, pos)
if match != nil && err == nil && match.Index == pos {
groups := []string{}
namedGroups := make(map[string]string)
for _, g := range match.Groups() {
namedGroups[g.Name] = g.String()
groups = append(groups, g.String())
}
return i, rule, groups
return i, rule, groups, namedGroups
}
}
return 0, &CompiledRule{}, nil
return 0, &CompiledRule{}, nil, nil
}
// replace \r and \r\n with \n