diff --git a/models/Configuration.md b/models/Configuration.md index 7ba508a..0ac706b 100644 --- a/models/Configuration.md +++ b/models/Configuration.md @@ -7,11 +7,11 @@ All configurations in URI Drain are done using `uri_drain.ini` file. [Here is a Snapshot is used to serialize and store the analysis results that have been saved in the current system. Currently, it supports saving snapshots to the file system. -| Name | Type(Unit) | Default | Description | -|---------------------------|-------------|---------|-------------------------------------------------------------------------------------------| -| file_dir | string | /tmp/ | The directory to save the snapshot, the persistent would disable when the value is empty. | -| snapshot_interval_minutes | int(minute) | 10 | The interval to save the snapshot. | -| compress_state | bool | True | Whether to compress the snapshot through zlib with base64. | +| Name | Type(Unit) | Environment Key | Default | Description | +|---------------------------|-------------|---------------------------|---------|-------------------------------------------------------------------------------------------| +| file_dir | string | SNAPSHOT_FILE_PATH | /tmp/ | The directory to save the snapshot, the persistent would disable when the value is empty. | +| snapshot_interval_minutes | int(minute) | SNAPSHOT_INTERVAL_MINUTES | 10 | The interval to save the snapshot. | +| compress_state | bool | SNAPSHOT_COMPRESS_STATE | True | Whether to compress the snapshot through zlib with base64. | ### Masking @@ -19,28 +19,34 @@ When aggregation methods are detected, Masking determines how to generate the ag Currently, all similar content is replaced with `{var}` by default. -| Name | Type(Unit) | Default | Description | -|-------------|------------|---------|-----------------------------------| -| mask_prefix | string | { | The prefix to mask the parameter. | -| mask_suffix | string | } | The suffix to mask the parameter. | +| Name | Type(Unit) | Environment Key | Default | Description | +|-------------|------------|-----------------|---------|-----------------------------------| +| mask_prefix | string | MASKING_PREFIX | { | The prefix to mask the parameter. | +| mask_suffix | string | MASKING_SUFFIX | } | The suffix to mask the parameter. | ### Drain Drain is the core algorithm of URI Drain. -| Name | Type(Unit) | Default | Description | -|------------------|------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| sim_th | float | 0.4 | The similarity threshold to decide if a new sequence should be merged into an existing cluster. | -| depth | int | 4 | Max depth levels of pattern. Minimum is 2. | -| max_children | int | 100 | Max number of children of an internal node. | -| max_clusters | int | 1024 | Max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. | -| extra_delimiters | string | / | The extra delimiters to split the sequence. | +| Name | Type(Unit) | Environment Key | Default | Description | +|------------------|------------|------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| sim_th | float | DRAIN_SIM_TH | 0.4 | The similarity threshold to decide if a new sequence should be merged into an existing cluster. | +| depth | int | DRAIN_DEPTH | 4 | Max depth levels of pattern. Minimum is 2. | +| max_children | int | DRAIN_MAX_CHILDREN | 100 | Max number of children of an internal node. | +| max_clusters | int | DRAIN_MAX_CLUSTERS | 1024 | Max number of tracked clusters (unlimited by default). When this number is reached, model starts replacing old clusters with a new ones according to the LRU policy. | +| extra_delimiters | string | DRAIN_EXTRA_DELIMITERS | \["/"\] | The extra delimiters to split the sequence. | ### Profiling Profiling is used to enable the profiling of the algorithm. -| Name | Type(Unit) | Default | Description | -|------------|-------------|---------|---------------------------------------------------| -| enabled | bool | False | Whether to enable the profiling. | -| report_sec | int(second) | 30 | The interval to report the profiling information. | \ No newline at end of file +| Name | Type(Unit) | Environment Key | Default | Description | +|------------|-------------|----------------------|---------|---------------------------------------------------| +| enabled | bool | PROFILING_ENABLED | False | Whether to enable the profiling. | +| report_sec | int(second) | PROFILING_REPORT_SEC | 30 | The interval to report the profiling information. | + +## Configuration with Environment Variables + +In the configuration, you can see that most of the configurations are in the format `${xxx:config_value}`. +It means that when the program starts, the agent would first read the `xxx` from the **system environment variables** in the runtime. +If it cannot be found, the value would be used as the `config_value` as value. diff --git a/models/uri_drain/template_miner_config.py b/models/uri_drain/template_miner_config.py index 6b0a65c..1af6064 100644 --- a/models/uri_drain/template_miner_config.py +++ b/models/uri_drain/template_miner_config.py @@ -4,11 +4,14 @@ import configparser import json import logging +import os +import re from models.uri_drain.masking import MaskingInstruction logger = logging.getLogger(__name__) +env_regular_regex = re.compile(r'\${(?P[_A-Z0-9]+):(?P.*)}') class TemplateMinerConfig: def __init__(self): @@ -40,42 +43,39 @@ def load(self, config_filename: str): section_drain = 'DRAIN' section_masking = 'MASKING' - self.engine = parser.get(section_drain, 'engine', fallback=self.engine) + self.engine = self.read_config_value(parser, section_drain, 'engine', str, self.engine) - self.profiling_enabled = parser.getboolean(section_profiling, 'enabled', - fallback=self.profiling_enabled) - self.profiling_report_sec = parser.getint(section_profiling, 'report_sec', - fallback=self.profiling_report_sec) + self.profiling_enabled = self.read_config_value(parser, section_profiling, 'enabled', bool, self.profiling_enabled) + self.profiling_report_sec = self.read_config_value(parser, section_profiling, 'report_sec', int, self.profiling_report_sec) - self.snapshot_interval_minutes = parser.getint(section_snapshot, 'snapshot_interval_minutes', - fallback=self.snapshot_interval_minutes) - self.snapshot_compress_state = parser.getboolean(section_snapshot, 'compress_state', - fallback=self.snapshot_compress_state) - file_path = parser.get(section_snapshot, 'file_path', fallback=None) + self.snapshot_interval_minutes = self.read_config_value(parser, section_snapshot, 'snapshot_interval_minutes', + int, self.snapshot_interval_minutes) + self.snapshot_compress_state = self.read_config_value(parser, section_snapshot, 'compress_state', bool, + self.snapshot_compress_state) + file_path = self.read_config_value(parser, section_snapshot, 'file_path', str, None) if file_path: self.snapshot_file_dir = file_path - drain_extra_delimiters_str = parser.get(section_drain, 'extra_delimiters', - fallback=str(self.drain_extra_delimiters)) + drain_extra_delimiters_str = self.read_config_value(parser, section_drain, 'extra_delimiters', str, + str(self.drain_extra_delimiters)) self.drain_extra_delimiters = ast.literal_eval(drain_extra_delimiters_str) - self.drain_sim_th = parser.getfloat(section_drain, 'sim_th', - fallback=self.drain_sim_th) - self.drain_depth = parser.getint(section_drain, 'depth', - fallback=self.drain_depth) - self.drain_max_children = parser.getint(section_drain, 'max_children', - fallback=self.drain_max_children) - self.drain_max_clusters = parser.getint(section_drain, 'max_clusters', - fallback=self.drain_max_clusters) - self.parametrize_numeric_tokens = parser.getboolean(section_drain, 'parametrize_numeric_tokens', - fallback=self.parametrize_numeric_tokens) - - masking_instructions_str = parser.get(section_masking, 'masking', - fallback=str(self.masking_instructions)) - self.mask_prefix = parser.get(section_masking, 'mask_prefix', fallback=self.mask_prefix) - self.mask_suffix = parser.get(section_masking, 'mask_suffix', fallback=self.mask_suffix) - self.parameter_extraction_cache_capacity = parser.get(section_masking, 'parameter_extraction_cache_capacity', - fallback=self.parameter_extraction_cache_capacity) + self.drain_sim_th = self.read_config_value(parser, section_drain, 'sim_th', float, self.drain_sim_th) + self.drain_depth = self.read_config_value(parser, section_drain, 'depth', int, self.drain_depth) + self.drain_max_children = self.read_config_value(parser, section_drain, 'max_children', int, + self.drain_max_children) + self.drain_max_clusters = self.read_config_value(parser, section_drain, 'max_clusters', int, + self.drain_max_clusters) + self.parametrize_numeric_tokens = self.read_config_value(parser, section_drain, 'parametrize_numeric_tokens', + bool, self.parametrize_numeric_tokens) + + masking_instructions_str = self.read_config_value(parser, section_masking, 'masking', str, + str(self.masking_instructions)) + self.mask_prefix = self.read_config_value(parser, section_masking, 'mask_prefix', str, self.mask_prefix) + self.mask_suffix = self.read_config_value(parser, section_masking, 'mask_suffix', str, self.mask_suffix) + self.parameter_extraction_cache_capacity = self.read_config_value(parser, section_masking, + 'parameter_extraction_cache_capacity', int, + self.parameter_extraction_cache_capacity) masking_instructions = [] masking_list = json.loads(masking_instructions_str) @@ -83,3 +83,23 @@ def load(self, config_filename: str): instruction = MaskingInstruction(mi['regex_pattern'], mi['mask_with']) masking_instructions.append(instruction) self.masking_instructions = masking_instructions + + def read_value_with_env(self, value: str): + match = env_regular_regex.match(value) + if match: + env = match.group('ENV') + default = match.group('DEF') + return os.getenv(env, default) + else: + return value + + def read_config_value(self, parser, section, key, tp, default): + conf_value = parser.get(section, key, fallback=None) + if conf_value is None: + return default + val = self.read_value_with_env(conf_value) + if tp == bool: + if val.lower() not in parser.BOOLEAN_STATES: + raise ValueError('Not a boolean: %s' % val) + return parser.BOOLEAN_STATES[val.lower()] + return tp(val) diff --git a/servers/simple/uri_drain.ini b/servers/simple/uri_drain.ini index 09643a9..522628d 100644 --- a/servers/simple/uri_drain.ini +++ b/servers/simple/uri_drain.ini @@ -13,28 +13,28 @@ # limitations under the License. [SNAPSHOT] -file_path = /tmp/ -snapshot_interval_minutes = 10 -compress_state = True +file_path = ${SNAPSHOT_FILE_PATH:/tmp/} +snapshot_interval_minutes = ${SNAPSHOT_INTERVAL_MINUTES:10} +compress_state = ${SNAPSHOT_COMPRESS_STATE:True} [MASKING] ;masking = [ ; {"regex_pattern":"\\d+", "mask_with": "INT"} ; ] -mask_prefix = { -mask_suffix = } +mask_prefix = ${MASKING_PREFIX:{} +mask_suffix = ${MASKING_SUFFIX:}} [DRAIN] # engine is Optional parameter. Engine will be "Drain" if the engine argument is not specified. # engine has two options: 'Drain' and 'JaccardDrain'. -# engine = Drain -sim_th = 0.4 +engine = ${DRAIN_ENGINE:Drain} +sim_th = ${DRAIN_SIM_TH:0.4} ; TODO: Evaluate: sim_th is dynamically calculated. -depth = 4 -max_children = 100 -max_clusters = 1024 -extra_delimiters = ["/"] +depth = ${DRAIN_DEPTH:4} +max_children = ${DRAIN_MAX_CHILDREN:100} +max_clusters = ${DRAIN_MAX_CLUSTERS:1024} +extra_delimiters = ${DRAIN_EXTRA_DELIMITERS:["/"]} [PROFILING] -enabled = False -report_sec = 30 +enabled = ${PROFILING_ENABLED:False} +report_sec = ${PROFILING_REPORT_SEC:30}