Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support full reconstruction of HCL from parse tree #169

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,6 @@ node_modules/
# Don't commit the generated parser
lark_parser.py
.lark_cache.bin

# ASDF tool-versions file
.tool-versions
2 changes: 1 addition & 1 deletion hcl2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
except ImportError:
__version__ = "unknown"

from .api import load, loads
from .api import load, loads, parse, parses, transform, writes, AST
41 changes: 41 additions & 0 deletions hcl2/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""The API that will be exposed to users of this package"""
from typing import TextIO

from lark.tree import Tree as AST
from hcl2.parser import hcl2
from hcl2.transformer import DictTransformer

Expand All @@ -26,3 +27,43 @@ def loads(text: str, with_meta=False) -> dict:
# Append a new line as a temporary fix
tree = hcl2.parse(text + "\n")
return DictTransformer(with_meta=with_meta).transform(tree)


def parse(file: TextIO) -> AST:
"""Load HCL2 syntax tree from a file.
:param file: File with hcl2 to be loaded as a dict.
"""
return parses(file.read())


def parses(text: str) -> AST:
"""Load HCL2 syntax tree from a string.
:param text: Text with hcl2 to be loaded as a dict.
"""
# defer this import until this method is called, due to the performance hit
# of rebuilding the grammar without cache
from hcl2.reconstructor import ( # pylint: disable=import-outside-toplevel
hcl2 as uncached_hcl2,
)

return uncached_hcl2.parse(text)


def transform(ast: AST, with_meta=False) -> dict:
"""Convert an HCL2 AST to a dictionary.
:param ast: HCL2 syntax tree, output from `parse` or `parses`
"""
return DictTransformer(with_meta=with_meta).transform(ast)


def writes(ast: AST) -> str:
"""Convert an HCL2 syntax tree to a string.
:param ast: HCL2 syntax tree, output from `parse` or `parses`
"""
# defer this import until this method is called, due to the performance hit
# of rebuilding the grammar without cache
from hcl2.reconstructor import ( # pylint: disable=import-outside-toplevel
hcl2_reconstructor,
)

return hcl2_reconstructor.reconstruct(ast)
16 changes: 10 additions & 6 deletions hcl2/hcl2.lark
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
start : body
body : (new_line_or_comment? (attribute | block))* new_line_or_comment?
attribute : identifier "=" expression
attribute : identifier EQ expression
block : identifier (identifier | STRING_LIT)* new_line_or_comment? "{" body "}"
new_line_and_or_comma: new_line_or_comment | "," | "," new_line_or_comment
new_line_or_comment: ( /\n/ | /#.*\n/ | /\/\/.*\n/ )+
new_line_or_comment: ( NL_OR_COMMENT )+
NL_OR_COMMENT: /\n[ \t]*/ | /#.*\n/ | /\/\/.*\n/ | /\/\*(.|\n)*?(\*\/)/

identifier : /[a-zA-Z_][a-zA-Z0-9_-]*/ | IN | FOR | IF | FOR_EACH
identifier : NAME | IN | FOR | IF | FOR_EACH
NAME : /[a-zA-Z_][a-zA-Z0-9_-]*/
IF : "if"
IN : "in"
FOR : "for"
Expand All @@ -18,8 +20,9 @@ conditional : expression "?" new_line_or_comment? expression new_line_or_comment
?operation : unary_op | binary_op
!unary_op : ("-" | "!") expr_term
binary_op : expression binary_term new_line_or_comment?
!binary_operator : "==" | "!=" | "<" | ">" | "<=" | ">=" | "-" | "*" | "/" | "%" | "&&" | "||" | "+"
!binary_operator : BINARY_OP
binary_term : binary_operator new_line_or_comment? expression
BINARY_OP : "==" | "!=" | "<" | ">" | "<=" | ">=" | "-" | "*" | "/" | "%" | "&&" | "||" | "+"

expr_term : "(" new_line_or_comment? expression new_line_or_comment? ")"
| float_lit
Expand Down Expand Up @@ -50,10 +53,12 @@ int_lit : DECIMAL+
| DECIMAL+ ("." DECIMAL+)? EXP_MARK DECIMAL+
DECIMAL : "0".."9"
EXP_MARK : ("e" | "E") ("+" | "-")?
EQ : /[ \t]*=(?!=|>)/

tuple : "[" (new_line_or_comment* expression new_line_or_comment* ",")* (new_line_or_comment* expression)? new_line_or_comment* "]"
object : "{" new_line_or_comment? (object_elem (new_line_and_or_comma object_elem )* new_line_and_or_comma?)? "}"
object_elem : (identifier | expression) ("=" | ":") expression
object_elem : (identifier | expression) ( EQ | ":") expression


heredoc_template : /<<(?P<heredoc>[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?(?P=heredoc)/
heredoc_template_trim : /<<-(?P<heredoc_trim>[a-zA-Z][a-zA-Z0-9._-]+)\n(?:.|\n)*?(?P=heredoc_trim)/
Expand All @@ -78,4 +83,3 @@ full_splat : "[*]" (get_attr | index)*
!for_cond : "if" new_line_or_comment? expression

%ignore /[ \t]+/
%ignore /\/\*(.|\n)*?(\*\/)/
162 changes: 162 additions & 0 deletions hcl2/reconstructor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
"""A reconstructor for HCL2 implemented using Lark's experimental reconstruction functionality"""

from lark import Lark
from lark.reconstruct import Reconstructor
from lark.utils import is_id_continue

# this is duplicated from `parser` because we need different options here for
# the reconstructor. please make sure changes are kept in sync between the two
# if necessary.
hcl2 = Lark.open(
"hcl2.lark",
parser="lalr",
# Caching must be disabled to allow for reconstruction until lark-parser/lark#1472 is fixed:
#
# https://github.com/lark-parser/lark/issues/1472
#
# cache=str(PARSER_FILE), # Disable/Delete file to effect changes to the grammar
rel_to=__file__,
propagate_positions=True,
maybe_placeholders=False, # Needed for reconstruction
)

CHAR_SPACE_AFTER = set(',~@<>="|?)]:')
CHAR_SPACE_BEFORE = (CHAR_SPACE_AFTER - set(",=")) | set("'")
KEYWORDS_SPACE_AFTER = [
"if",
"in",
"for",
"for_each",
"==",
"!=",
"<",
">",
"<=",
">=",
"-",
"*",
"/",
"%",
"&&",
"||",
"+",
]
KEYWORDS_SPACE_BEFORE = KEYWORDS_SPACE_AFTER
DIGITS = set("0123456789")
NEVER_SPACE_AFTER = set("[(")
NEVER_SPACE_BEFORE = set("]),.")
NEVER_COMMA_BEFORE = set("])}")
# characters that are OK to come right after an identifier with no space between
IDENT_NO_SPACE = set("()[]")


def _add_extra_space(prev_item, item):
# pylint: disable=too-many-boolean-expressions, too-many-return-statements

##### the scenarios where explicitly disallow spaces: #####

# if we already have a space, don't add another
if prev_item[-1].isspace() or item[0].isspace():
return False

# none of the following should be separated by spaces:
# - groups of digits
# - namespaced::function::calls
# - characters within an identifier like array[0]()
if (
(prev_item[-1] in DIGITS and item[0] in DIGITS)
or item == "::"
or prev_item == "::"
or (prev_item[-1] in IDENT_NO_SPACE and item[0] in IDENT_NO_SPACE)
):
return False

# specific characters are also blocklisted from having spaces
if prev_item[-1] in NEVER_SPACE_AFTER or item[0] in NEVER_SPACE_BEFORE:
return False

##### the scenarios where we add spaces: #####

# scenario 1, the prev token ended with an identifier character
# and the next character is not an "IDENT_NO_SPACE" character
if is_id_continue(prev_item[-1]) and not item[0] in IDENT_NO_SPACE:
return True

# scenario 2, the prev token or the next token should be followed by a space
if (
prev_item[-1] in CHAR_SPACE_AFTER
or prev_item in KEYWORDS_SPACE_AFTER
or item[0] in CHAR_SPACE_BEFORE
or item in KEYWORDS_SPACE_BEFORE
):
return True

# scenario 3, the previous token was a block opening brace and
# the next token is not a closing brace (so the block is on one
# line and not empty)
if prev_item[-1] == "{" and item[0] != "}":
return True

##### otherwise, we don't add a space #####
return False


def _postprocess_reconstruct(items):
"""
Postprocess the stream of tokens derived from the AST during reconstruction.

For HCL2, this is used exclusively for adding whitespace in the right locations.
"""
prev_item = ""
for item in items:
# first, handle any deferred tokens
if isinstance(prev_item, tuple) and prev_item[0] == "_deferred":
prev_item = prev_item[1]

# if the deferred token was a comma, see if we're ending a block
if prev_item == ",":
if item[0] not in NEVER_COMMA_BEFORE:
yield prev_item
else:
yield prev_item

# if we're between two tokens, determine if we need to add an extra space
# we need the previous item and the current item to exist to evaluate these rules
if prev_item and item and _add_extra_space(prev_item, item):
yield " "

# in some cases, we may want to defer printing the next token
defer_item = False

# prevent the inclusion of extra commas if they are not intended
if item[0] == ",":
item = ("_deferred", item)
defer_item = True

# print the actual token
if not defer_item:
yield item

# store the previous item for the next token
prev_item = item

# if the last token was deferred, print it before continuing
if isinstance(prev_item, tuple) and prev_item[0] == "_deferred":
yield prev_item[1]


class HCLReconstructor:
"""This class converts a Lark.Tree AST back into a string representing the underlying HCL code."""
def __init__(self, parser):
self._recons = Reconstructor(parser)

def reconstruct(self, tree):
"""Convert a Lark.Tree AST back into a string representation of HCL."""
return self._recons.reconstruct(
tree,
_postprocess_reconstruct,
insert_spaces=False,
)


hcl2_reconstructor = HCLReconstructor(hcl2)
7 changes: 5 additions & 2 deletions hcl2/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,10 @@ def object_elem(self, args: List) -> Dict:
# This returns a dict with a single key/value pair to make it easier to merge these
# into a bigger dict that is returned by the "object" function
key = self.strip_quotes(args[0])
value = self.to_string_dollar(args[1])
if len(args) == 3:
value = self.to_string_dollar(args[2])
else:
value = self.to_string_dollar(args[1])

return {key: value}

Expand Down Expand Up @@ -148,7 +151,7 @@ def attribute(self, args: List) -> Attribute:
key = str(args[0])
if key.startswith('"') and key.endswith('"'):
key = key[1:-1]
value = self.to_string_dollar(args[1])
value = self.to_string_dollar(args[2])
return Attribute(key, value)

def conditional(self, args: List) -> str:
Expand Down
Loading
Loading