Skip to content

Commit

Permalink
Add UTF-8 aware slicer to allow slicing long messages that can still …
Browse files Browse the repository at this point in the history
…be used in JSON.
  • Loading branch information
Ivor committed Oct 24, 2024
1 parent 5b2dc0a commit 381614a
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 8 deletions.
92 changes: 92 additions & 0 deletions lib/meta_logger/utf8_slicer.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
defmodule MetaLogger.Slicer.Utf8Impl do
@moduledoc """
Slices a string into chunks of a given length, taking into account the UTF-8 encoding of the string.
"""

@behaviour MetaLogger.Slicer

@typedoc "Max length in bytes or `:infinity` if the entry should not be sliced."
@type max_entry_length :: non_neg_integer() | :infinity

@doc """
Returns sliced log entries according to the given max entry length.
Ensures that all slices are valid UTF-8 strings by not splitting multibyte characters.
If the entry is smaller than the given max length, or if `:infinity` is given
as an option, a list with one entry is returned. Otherwise, a list with multiple
entries is returned.
## Examples
iex> #{inspect(__MODULE__)}.slice("1234567890", 10)
["1234567890"]
iex> #{inspect(__MODULE__)}.slice("1234567890", :infinity)
["1234567890"]
iex> #{inspect(__MODULE__)}.slice("1234567890", 5)
["12345", "67890"]
iex> #{inspect(__MODULE__)}.slice("Hello 世界", 7)
["Hello ", "世界"]
"""
@impl MetaLogger.Slicer
@spec slice(String.t(), integer()) :: [String.t()]
def slice(log_entry, :infinity), do: [log_entry]

def slice(entry, max_entry_length) when byte_size(entry) <= max_entry_length,
do: [entry]

def slice(entry, max_entry_length) do
do_slice(entry, max_entry_length, [], [], 0)
end

@spec do_slice(binary(), integer(), [binary()], [iodata()], integer()) :: [binary()]
defp do_slice(<<>>, _max_length, slices, partial_slice, _partial_size) do
# The remaining log entry is empty so we clean up the last partial_slice
# and return the slices.
partial_slice
|> case do
[] -> slices
_ -> bank_partial_slice(slices, partial_slice)
end
|> Enum.reverse()
end

defp do_slice(
<<codepoint::utf8, rest::binary>>,
max_length,
slices,
partial_slice,
partial_size
) do
codepoint_binary = <<codepoint::utf8>>
codepoint_size = byte_size(codepoint_binary)
new_size = partial_size + codepoint_size

if new_size <= max_length do
# There is still room in the partial_slice for more codepoints
# so we prepend (to later reverse) the codepoint binary
# and consider the next codepoint.
do_slice(rest, max_length, slices, [codepoint_binary | partial_slice], new_size)
else
# Adding the new codepoint to the partial slice puts it over the limit
# So we bank the partial slice and start the codepoint as the new partial_slice
slices = bank_partial_slice(slices, partial_slice)
new_partial_slice = [codepoint_binary]
do_slice(rest, max_length, slices, new_partial_slice, codepoint_size)
end
end

# Converts the inverted list of codepoints into a
# binary slice and appends it to our list of slices.
@spec bank_partial_slice([binary()], [iodata()]) :: [binary()]
defp bank_partial_slice(slices, partial_slice) do
[reconstruct_current_slice_as_binary(partial_slice) | slices]
end

@spec reconstruct_current_slice_as_binary([iodata()]) :: binary()
defp reconstruct_current_slice_as_binary(current_slice) do
IO.iodata_to_binary(Enum.reverse(current_slice))
end
end
17 changes: 10 additions & 7 deletions lib/tesla/middleware/meta_logger.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ if Code.ensure_loaded?(Tesla) do
@moduledoc """
Tesla middleware to log requests and responses.
You can pass the options to the middleware or to the `Tesla.Env` request . The Tesla env
You can pass the options to the middleware or to the `Tesla.Env` request. The Tesla env
options take precedence over the middleware options.
## Usage example
Expand All @@ -17,7 +17,8 @@ if Code.ensure_loaded?(Tesla) do
filter_query_params: [:api_key],
log_level: :debug,
log_tag: MyApp,
max_entry_length: :infinity
max_entry_length: :infinity,
slicer: MyCustomSlicer
end
## Options
Expand All @@ -27,17 +28,19 @@ if Code.ensure_loaded?(Tesla) do
* `:filter_query_params` - The query params that should not be logged, the values will be
replaced with `[FILTERED]`. Defaults to: `[]`.
* `:filter_body` - The request and response body patterns that should not be logged, each
filter can be just a pattern, wich will be replaced by `"[FILTERED]"`, or it can be a tuple
filter can be just a pattern, which will be replaced by `"[FILTERED]"`, or it can be a tuple
with the pattern and the replacement. Because the body filtering is applied to strings it is
necessary that this middleware is the last one on the stack, so it receives the request body
already encoded and the response body not yet decoded. If the body is not a string, the
filtering will be skipped.
* `:log_level` - The log level to be used, defaults to: `:info`. Responses with HTTP status
code 400 and above will be logged with `:error`, and redirect with `:warning`.
* `:log_tag` - The log tag to be prefixed in the logs. Any non-string value will be inspect as
* `:log_tag` - The log tag to be prefixed in the logs. Any non-string value will be inspected as
a string. Defaults to the current module name.
* `:max_entry_length` - The maximum length of a log entry before it is splitted into new ones.
* `:max_entry_length` - The maximum length of a log entry before it is split into new ones.
Defaults to `:infinity`.
* `:slicer` - The module implementing the `MetaLogger.Slicer` behaviour to be used for slicing
log entries. Defaults to `MetaLogger.Slicer.DefaultImpl`.
"""

Expand Down Expand Up @@ -71,7 +74,7 @@ if Code.ensure_loaded?(Tesla) do
|> maybe_put_default_value(:log_level, :info)
|> maybe_put_default_value(:log_tag, __MODULE__)
|> maybe_put_default_value(:max_entry_length, :infinity)
|> maybe_put_default_value(:slicer, MetaLogger.Slicer.DefaultImpl)
|> maybe_put_default_value(:slicer, Slicer.DefaultImpl)
end

@spec maybe_put_default_values(Env.opts(), [atom()], any()) :: Env.opts()
Expand All @@ -80,7 +83,7 @@ if Code.ensure_loaded?(Tesla) do

@spec maybe_put_default_value(Env.opts(), atom(), any()) :: Env.opts()
defp maybe_put_default_value(options, key, default_value),
do: Keyword.put(options, key, Keyword.get(options, key, default_value))
do: Keyword.put_new(options, key, default_value)

@spec log_request(Env.t(), Env.opts()) :: Env.t()
defp log_request(%Env{} = env, options) do
Expand Down
12 changes: 11 additions & 1 deletion test/meta_logger/slicer_test.exs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defmodule MetaLogger.SlicerTest do
use ExUnit.Case, async: true

alias MetaLogger.Slicer.Default, as: Subject
alias MetaLogger.Slicer.DefaultImpl, as: Subject

doctest Subject

Expand Down Expand Up @@ -46,5 +46,15 @@ defmodule MetaLogger.SlicerTest do
} do
assert Subject.slice(entry, :pqp) == [entry]
end

test "when slicing a UTF-8 string not all slices will be valid UTF-8 strings" do
range_of_slices =
7..1
|> Enum.flat_map(fn max_length ->
Subject.slice("Hello 世界", max_length)
end)

refute Enum.all?(range_of_slices, &String.valid?/1)
end
end
end
58 changes: 58 additions & 0 deletions test/meta_logger/utf8_slicer_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
defmodule MetaLogger.Utf8SlicerTest do
use ExUnit.Case, async: true

alias MetaLogger.Slicer.Utf8Impl, as: Subject

doctest Subject

describe "slice/2" do
setup do
entry = "0123456789"

{:ok, entry: entry}
end

test "when `:infinity` is given as max entry length, returns a list with one entry", %{
entry: entry
} do
assert Subject.slice(entry, :infinity) == [entry]
end

test "when max entry length is smaller than the size of given entry, " <>
"returns a list with one entry",
%{
entry: entry
} do
assert Subject.slice(entry, 10) == ["0123456789"]
end

test "when max entry length is half the size of given entry, returns a list with two entries",
%{
entry: entry
} do
assert Subject.slice(entry, 5) == ["01234", "56789"]
end

test "when given max entry length is three and the given entry size is 10, " <>
"returns a list with four entries",
%{
entry: entry
} do
assert Subject.slice(entry, 3) == ["012", "345", "678", "9"]
end

test "when an invalid max entry length is given, returns a list with one entry", %{
entry: entry
} do
assert Subject.slice(entry, :pqp) == [entry]
end

test "when slicing a UTF-8 string all slices will be valid UTF-8 strings" do
7..1
|> Enum.each(fn max_length ->
assert log_entries = Subject.slice("Hello 世界", max_length)
assert Enum.all?(log_entries, &String.valid?/1)
end)
end
end
end

0 comments on commit 381614a

Please sign in to comment.