Skip to content

Commit

Permalink
fix: fix symbol checking and creation for subst with a regexp
Browse files Browse the repository at this point in the history
This change updates the ``checkRegex`` helper to not track symbols for a
regexp when it is used by the builtin ``subst``. In this case the VM
will not store these matches, and therefore these symbols will not be
available to the program. The compiler will, however, compile these
regexp and add them to the regexp table for use by the VM, and multiple
calls to ``subst`` are already handled by the VM.

fixes: #693, #810
  • Loading branch information
terencehonles committed Aug 8, 2024
1 parent d4b8a71 commit c7ba9c4
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 0 deletions.
15 changes: 15 additions & 0 deletions internal/runtime/compiler/checker/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ type checker struct {
tooDeep bool
maxRecursionDepth int
maxRegexLength int
noRegexSymbols bool
}

// Check performs a semantic check of the astNode, and returns a potentially
Expand Down Expand Up @@ -83,6 +84,12 @@ func (c *checker) VisitBefore(node ast.Node) (ast.Visitor, ast.Node) {
glog.V(2).Infof("Created new scope %v in condstmt", n.Scope)
return c, n

case *ast.BuiltinExpr:
if n.Name == "subst" {
c.noRegexSymbols = true
}
return c, n

case *ast.CaprefTerm:
if n.Symbol == nil {
sym := c.scope.Lookup(n.Name, symbol.CaprefSymbol)
Expand Down Expand Up @@ -821,6 +828,10 @@ func (c *checker) VisitAfter(node ast.Node) ast.Node {
return n
}

case "subst":
c.noRegexSymbols = false
return n

case "tolower":
if !types.Equals(gotType.Args[0], types.String) {
c.errors.Add(n.Args.(*ast.ExprList).Children[0].Pos(), fmt.Sprintf("Expecting a String for argument 1 of tolower(), not %v.", gotType.Args[0]))
Expand Down Expand Up @@ -875,6 +886,10 @@ func (c *checker) checkRegex(pattern string, n ast.Node) {
return
}
if reAst, err := types.ParseRegexp(pattern); err == nil {
if c.noRegexSymbols {
return
}

// We reserve the names of the capturing groups as declarations
// of those symbols, so that future CAPREF tokens parsed can
// retrieve their value. By recording them in the symbol table, we
Expand Down
19 changes: 19 additions & 0 deletions internal/runtime/compiler/checker/checker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@ var checkerInvalidPrograms = []struct {
[]string{"invalid regex 3:1:1-24: error parsing regexp: invalid character class range: `[:alph:]`"},
},

{
"subst invalid regex",
`subst(/foo(/, "", "")
`,
[]string{"subst invalid regex:1:7-12: error parsing regexp: missing closing ): `foo(`"},
},

{
"duplicate declaration",
"counter foo\ncounter foo\n",
Expand Down Expand Up @@ -275,6 +282,13 @@ m`,
[]string{"regexp too long:1:1-1027: Exceeded maximum regular expression pattern length of 1024 bytes with 1025.", "\tExcessively long patterns are likely to cause compilation and runtime performance problems."},
},

{
"subst regexp too long",
"subst(/" + strings.Repeat("c", 1025) + `/, "", "")
`,
[]string{"subst regexp too long:1:7-1033: Exceeded maximum regular expression pattern length of 1024 bytes with 1025.", "\tExcessively long patterns are likely to cause compilation and runtime performance problems."},
},

{
"strptime invalid args",
`strptime("",8)
Expand Down Expand Up @@ -576,6 +590,11 @@ foo = subst(",", "", $1)
}`},
{"regexp subst", `
subst(/\d+/, "d", "1234")
`},
{"regexp subst twice", `
text value
value = subst(/[a-zA-Z]+/, "a", "1234abcd")
value = subst(/\d+/, "d", value)
`},
}

Expand Down

0 comments on commit c7ba9c4

Please sign in to comment.