Skip to content

Commit

Permalink
Include p99 latency tracking (#274)
Browse files Browse the repository at this point in the history
* Added 142 new benchmarks focused on latency tracking (rate-limited)

* Bumping version from 0.1.240 to 0.1.241

* Added 142 new benchmarks focused on latency tracking (rate-limited)

* Using docker image redislabs/memtier_benchmark:2.1.0 on latency related benchmarks

* Removed benchmarks above 25gb

* Removed 2TB test

* Deleted wrongly designed test

* Added 3M keys 512 and 1000B GET/SET use-cases

* include p99 latency tracking
  • Loading branch information
fcostaoliveira authored Oct 14, 2024
1 parent 4f37432 commit f424438
Show file tree
Hide file tree
Showing 5 changed files with 251 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "redis-benchmarks-specification"
version = "0.1.245"
version = "0.1.247"
description = "The Redis benchmarks specification describes the cross-language/tools requirements and expectations to foster performance and observability standards around redis related technologies. Members from both industry and academia, including organizations and individuals are encouraged to contribute."
authors = ["filipecosta90 <[email protected]>","Redis Performance Group <[email protected]>"]
readme = "Readme.md"
Expand Down
6 changes: 5 additions & 1 deletion redis_benchmarks_specification/__cli__/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,18 @@ def get_commits_by_tags(args, repo):
def get_repo(args):
redisDirPath = args.redis_repo
cleanUp = False
last_n = args.last_n
if redisDirPath is None:
cleanUp = True
redisDirPath = tempfile.mkdtemp()
remote_url = f"https://github.com/{args.gh_org}/{args.gh_repo}"
logging.info(
f"Retrieving redis repo from remote {remote_url} into {redisDirPath}. Using branch {args.branch}."
)
cmd = f"git clone {remote_url} {redisDirPath} --branch {args.branch}\n"
depth_str = ""
if last_n > 0:
depth_str = f" --depth {last_n}"
cmd = f"git clone {remote_url} {redisDirPath} --branch {args.branch} {depth_str}\n"
process = subprocess.Popen(
"/bin/bash", stdin=subprocess.PIPE, stdout=subprocess.PIPE
)
Expand Down
47 changes: 41 additions & 6 deletions redis_benchmarks_specification/__compare__/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -1019,8 +1019,13 @@ def from_rts_to_regression_table(
baseline_only_list = []
comparison_only_list = []
no_datapoints_list = []
no_datapoints_baseline_list = []
no_datapoints_comparison_list = []
original_metric_mode = metric_mode
for test_name in test_names:
metric_mode = original_metric_mode
compare_version = "main"
# GE
github_link = "https://github.com/redis/redis-benchmarks-specification/blob"
test_path = f"redis_benchmarks_specification/test-suites/{test_name}.yml"
test_link = f"[{test_name}]({github_link}/{compare_version}/{test_path})"
Expand Down Expand Up @@ -1076,6 +1081,22 @@ def from_rts_to_regression_table(
if len(baseline_timeseries) > 1 and multi_value_baseline is False:
baseline_timeseries = get_only_Totals(baseline_timeseries)

if len(baseline_timeseries) == 0:
logging.warning(
f"No datapoints for test={test_name} for baseline timeseries {baseline_timeseries}"
)
no_datapoints_baseline_list.append(test_name)
if test_name not in no_datapoints_list:
no_datapoints_list.append(test_name)

if len(comparison_timeseries) == 0:
logging.warning(
f"No datapoints for test={test_name} for comparison timeseries {comparison_timeseries}"
)
no_datapoints_comparison_list.append(test_name)
if test_name not in no_datapoints_list:
no_datapoints_list.append(test_name)

if len(baseline_timeseries) != 1 and multi_value_baseline is False:
if verbose:
logging.warning(
Expand Down Expand Up @@ -1152,11 +1173,14 @@ def from_rts_to_regression_table(
)

waterline = regressions_percent_lower_limit
if regressions_percent_lower_limit < largest_variance:
note = "waterline={:.1f}%.".format(largest_variance)
waterline = largest_variance
# if regressions_percent_lower_limit < largest_variance:
# note = "waterline={:.1f}%.".format(largest_variance)
# waterline = largest_variance

except redis.exceptions.ResponseError:
except redis.exceptions.ResponseError as e:
logging.error(
"Detected a redis.exceptions.ResponseError. {}".format(e.__str__())
)
pass
except ZeroDivisionError as e:
logging.error("Detected a ZeroDivisionError. {}".format(e.__str__()))
Expand Down Expand Up @@ -1198,7 +1222,7 @@ def from_rts_to_regression_table(
else:
# lower-better
percentage_change = (
float(baseline_v) / float(comparison_v) - 1
-(float(baseline_v) - float(comparison_v)) / float(baseline_v)
) * 100.0
else:
logging.warn(
Expand Down Expand Up @@ -1280,16 +1304,27 @@ def from_rts_to_regression_table(
logging.warning(
"There were no datapoints both for baseline and comparison for test: {test_name}"
)
no_datapoints_list.append(test_name)
if test_name not in no_datapoints_list:
no_datapoints_list.append(test_name)
logging.warning(
f"There is a total of {len(no_datapoints_list)} tests without datapoints for baseline AND comparison"
)
logging.info(
f"There is a total of {len(comparison_only_list)} tests without datapoints for baseline"
)
print(
"No datapoint baseline regex={test_names_str}".format(
test_names_str="|".join(no_datapoints_baseline_list)
)
)
logging.info(
f"There is a total of {len(baseline_only_list)} tests without datapoints for comparison"
)
print(
"No datapoint comparison regex={test_names_str}".format(
test_names_str="|".join(no_datapoints_comparison_list)
)
)
logging.info(f"There is a total of {len(unstable_list)} UNSTABLE tests")
return (
detected_regressions,
Expand Down
6 changes: 5 additions & 1 deletion redis_benchmarks_specification/test-suites/defaults.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,19 @@ exporter:
- $."BEST RUN RESULTS".Totals."Latency"
- $."BEST RUN RESULTS".Totals."Misses/sec"
- $."BEST RUN RESULTS".Totals."Percentile Latencies"."p50.00"
- $."BEST RUN RESULTS".Totals."Percentile Latencies"."p99.00"
- $."WORST RUN RESULTS".Totals."Ops/sec"
- $."WORST RUN RESULTS".Totals."Latency"
- $."WORST RUN RESULTS".Totals."Misses/sec"
- $."WORST RUN RESULTS".Totals."Percentile Latencies"."p50.00"
- $."WORST RUN RESULTS".Totals."Percentile Latencies"."p99.00"
- $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Ops/sec"
- $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Latency"
- $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Misses/sec"
- $."AGGREGATED AVERAGE RESULTS (5 runs)".Totals."Percentile Latencies"."p50.00"
- $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Percentile Latencies"."p50.00"
- $."AGGREGATED AVERAGE RESULTS (3 runs)".Totals."Percentile Latencies"."p99.00"
- $."ALL STATS".Totals."Ops/sec"
- $."ALL STATS".Totals."Latency"
- $."ALL STATS".Totals."Misses/sec"
- $."ALL STATS".Totals."Percentile Latencies"."p50.00"
- $."ALL STATS".Totals."Percentile Latencies"."p99.00"
199 changes: 199 additions & 0 deletions utils/summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
import os
import argparse
from ruamel.yaml import YAML
import collections

# Command groups mapping
COMMAND_GROUPS = {
"string": ["set", "get", "append", "getbit", "setrange", "bitcount", "mget"],
"hash": [
"hset",
"hget",
"hincrby",
"hmset",
"hdel",
"hscan",
"hexists",
"hkeys",
"hvals",
"hmget",
"hsetnx",
"hgetall",
],
"list": ["lpush", "rpop", "lpop", "lrem", "lrange", "lindex", "lpos", "linsert"],
"set": [
"sadd",
"smembers",
"sismember",
"sunion",
"sdiff",
"sinter",
"smismember",
"sscan",
],
"sorted_set": [
"zadd",
"zrange",
"zrevrange",
"zrangebyscore",
"zrevrangebyscore",
"zincrby",
"zrem",
"zscore",
"zrank",
"zunion",
"zunionstore",
"zrevrank",
"zscan",
"zcard",
],
"stream": ["xadd", "xread"],
"geospatial": ["geosearch", "geopos", "geohash", "geodist"],
"key_management": [
"expire",
"pexpire",
"ttl",
"expireat",
"touch",
"del",
"exists",
],
"pubsub": ["ping", "hello"],
"scripting": ["eval", "evalsha"],
"transaction": ["multi", "exec"],
"hyperloglog": ["pfadd"],
"server_management": ["hello"],
}


def parse_arguments(arguments):
"""
Parses the memtier benchmark arguments to extract relevant parameters.
Specifically extracts the --command argument.
Args:
arguments (str): The arguments string from the YAML file.
Returns:
dict: A dictionary containing extracted parameters.
"""
params = {}
command = None

for arg in arguments.split():
if arg.startswith("--command="):
command = arg.split("=", 1)[1]
elif arg == "--command":
command = arguments.split()[arguments.split().index(arg) + 1]

return command


def categorize_command(command):
"""
Categorize a Redis command into a command group.
Args:
command (str): The Redis command.
Returns:
str: The command group.
"""
for group, commands in COMMAND_GROUPS.items():
if command in commands:
return group
return "unknown"


def summarize_yaml_file(yaml_file_path, command_summary, command_group_summary):
"""
Processes a single YAML file to extract the tested commands and groups.
Args:
yaml_file_path (str): Path to the YAML file.
command_summary (dict): Dictionary to store the command summary.
command_group_summary (dict): Dictionary to store the command group summary.
"""
yaml = YAML()
yaml.preserve_quotes = True

try:
with open(yaml_file_path, "r") as file:
config = yaml.load(file)
except Exception as e:
print(f"Error reading {yaml_file_path}: {e}")
return

# Extract tested commands from 'tested-commands'
tested_commands = config.get("tested-commands", [])
for command in tested_commands:
command_summary["tested_commands"][command] += 1
command_group = categorize_command(command)
command_group_summary[command_group] += 1

# Extract command from 'clientconfig.arguments'
arguments = config.get("clientconfig", {}).get("arguments", "")
if arguments:
command = parse_arguments(arguments)
if command:
command_summary["client_arguments_commands"][command] += 1
command_group = categorize_command(command)
command_group_summary[command_group] += 1


def summarize_directory(directory):
"""
Summarizes the commands and command groups across all YAML files in a directory.
Args:
directory (str): Path to the directory containing YAML files.
"""
command_summary = {
"tested_commands": collections.Counter(),
"client_arguments_commands": collections.Counter(),
}
command_group_summary = collections.Counter()

# Iterate over all YAML files in the directory
for filename in os.listdir(directory):
if filename.endswith(".yml") or filename.endswith(".yaml"):
yaml_file_path = os.path.join(directory, filename)
summarize_yaml_file(yaml_file_path, command_summary, command_group_summary)

# Print summary
print("\nTested Commands Summary:")
for command, count in command_summary["tested_commands"].items():
print(f"{command}: {count} occurrences")

print("\nClient Arguments Commands Summary:")
for command, count in command_summary["client_arguments_commands"].items():
print(f"{command}: {count} occurrences")

print("\nCommand Group Summary:")
for group, count in command_group_summary.items():
print(f"{group.capitalize()}: {count} occurrences")


def main():
parser = argparse.ArgumentParser(
description="Summarize commands and command groups from YAML benchmark files."
)
parser.add_argument(
"--directory",
type=str,
default="../redis_benchmarks_specification/test-suites/",
help="Path to the directory containing YAML test files.",
)

args = parser.parse_args()
directory = args.directory

if not os.path.isdir(directory):
print(f"Directory {directory} does not exist.")
return

summarize_directory(directory)


if __name__ == "__main__":
main()

0 comments on commit f424438

Please sign in to comment.