From 1daaa111d041916fc5dace029971bcd71e32d353 Mon Sep 17 00:00:00 2001 From: Ivor Paul Date: Fri, 25 Oct 2024 15:07:00 +0200 Subject: [PATCH] Add middleware option for slicer module and a UTF-8 aware slicer (#23) * Allow passing the slicer module in as an option to the Tesla middleware * Small tweaks * Add UTF-8 aware slicer to allow slicing long messages that can still be used in JSON. * Rearrange slicer behaviour and slicers. * Update the specs. * Add open line between @behaviour and @doc for the first function. Co-authored-by: Felipe Vieira * Instead of different function clauses use one with multiple when guards. Co-authored-by: Felipe Vieira * Fix PR feedback issues. * Change example invalid atom argument in test. * Fix the test value in slicer. * Update the changelog. --------- Co-authored-by: Felipe Vieira --- CHANGELOG.md | 5 ++ lib/meta_logger/slicer.ex | 41 +-------- lib/meta_logger/slicer/default_impl.ex | 46 ++++++++++ lib/meta_logger/slicer/utf8_impl.ex | 89 +++++++++++++++++++ lib/tesla/middleware/meta_logger.ex | 19 ++-- test/meta_logger/slicer/default_impl_test.exs | 60 +++++++++++++ .../utf8_impl_test.exs} | 14 ++- test/tesla/middleware/meta_logger_test.exs | 21 +++++ 8 files changed, 246 insertions(+), 49 deletions(-) create mode 100644 lib/meta_logger/slicer/default_impl.ex create mode 100644 lib/meta_logger/slicer/utf8_impl.ex create mode 100644 test/meta_logger/slicer/default_impl_test.exs rename test/meta_logger/{slicer_test.exs => slicer/utf8_impl_test.exs} (73%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11705ab..0b9c968 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added + +- Add `Tesla.Middleware.MetaLogger` configuration option `:slicer`. +- Add `MetaLogger.Slicer` protocol and `MetaLogger.Slicer.Utf8Impl` as a UTF-8 aware implementation. + ## [1.7.0] - 2024-02-05 ### Added diff --git a/lib/meta_logger/slicer.ex b/lib/meta_logger/slicer.ex index fb27e59..d28a239 100644 --- a/lib/meta_logger/slicer.ex +++ b/lib/meta_logger/slicer.ex @@ -1,46 +1,9 @@ defmodule MetaLogger.Slicer do @moduledoc """ - Responsible for slicing log entries according to the given max length option. + A bebaviour for slicing long entries into a list of entries shorter than a passed `max_entry_length` value. """ @typedoc "Max length in bytes or `:infinity` if the entry should not be sliced." @type max_entry_length :: non_neg_integer() | :infinity - - @doc """ - Returns sliced log entries according to the given max entry length. - - If the entry is smaller than given max length, or if `:infinity ` is given - as option, a list with one entry is returned. Otherwise a list with multiple - entries is returned. - - ## Examples - - iex> #{inspect(__MODULE__)}.slice("1234567890", 10) - ["1234567890"] - - iex> #{inspect(__MODULE__)}.slice("1234567890", :infinity) - ["1234567890"] - - iex> #{inspect(__MODULE__)}.slice("1234567890", 5) - ["12345", "67890"] - - """ - @spec slice(String.t(), max_entry_length()) :: [String.t()] - def slice(entry, max_entry_length) - when max_entry_length == :infinity - when byte_size(entry) <= max_entry_length, - do: [entry] - - def slice(entry, max_entry_length) do - entry_length = byte_size(entry) - rem = rem(entry_length, max_entry_length) - sliced_entries = for <>, do: slice - - if rem > 0 do - remainder_entry = binary_part(entry, entry_length, rem * -1) - sliced_entries ++ [remainder_entry] - else - sliced_entries - end - end + @callback slice(binary(), max_entry_length()) :: [binary()] end diff --git a/lib/meta_logger/slicer/default_impl.ex b/lib/meta_logger/slicer/default_impl.ex new file mode 100644 index 0000000..12948b2 --- /dev/null +++ b/lib/meta_logger/slicer/default_impl.ex @@ -0,0 +1,46 @@ +defmodule MetaLogger.Slicer.DefaultImpl do + @moduledoc """ + Responsible for slicing log entries according to the given max length option. + """ + + @behaviour MetaLogger.Slicer + + @doc """ + Returns sliced log entries according to the given max entry length. + + If the entry is smaller than given max length, or if `:infinity ` is given + as option, a list with one entry is returned. Otherwise a list with multiple + entries is returned. + + ## Examples + + iex> #{inspect(__MODULE__)}.slice("1234567890", 10) + ["1234567890"] + + iex> #{inspect(__MODULE__)}.slice("1234567890", :infinity) + ["1234567890"] + + iex> #{inspect(__MODULE__)}.slice("1234567890", 5) + ["12345", "67890"] + + """ + @impl MetaLogger.Slicer + @spec slice(binary(), MetaLogger.Slicer.max_entry_length()) :: [binary()] + def slice(entry, max_entry_length) + when max_entry_length == :infinity + when byte_size(entry) <= max_entry_length, + do: [entry] + + def slice(entry, max_entry_length) do + entry_length = byte_size(entry) + rem = rem(entry_length, max_entry_length) + sliced_entries = for <>, do: slice + + if rem > 0 do + remainder_entry = binary_part(entry, entry_length, rem * -1) + sliced_entries ++ [remainder_entry] + else + sliced_entries + end + end +end diff --git a/lib/meta_logger/slicer/utf8_impl.ex b/lib/meta_logger/slicer/utf8_impl.ex new file mode 100644 index 0000000..5af26ab --- /dev/null +++ b/lib/meta_logger/slicer/utf8_impl.ex @@ -0,0 +1,89 @@ +defmodule MetaLogger.Slicer.Utf8Impl do + @moduledoc """ + Slices a string into chunks of a given length, taking into account the UTF-8 encoding of the string. + """ + + @behaviour MetaLogger.Slicer + + @doc """ + Returns sliced log entries according to the given max entry length. + + Ensures that all slices are valid UTF-8 strings by not splitting multibyte characters. + If the entry is smaller than the given max length, or if `:infinity` is given + as an option, a list with one entry is returned. Otherwise, a list with multiple + entries is returned. + + ## Examples + + iex> #{inspect(__MODULE__)}.slice("1234567890", 10) + ["1234567890"] + + iex> #{inspect(__MODULE__)}.slice("1234567890", :infinity) + ["1234567890"] + + iex> #{inspect(__MODULE__)}.slice("1234567890", 5) + ["12345", "67890"] + + iex> #{inspect(__MODULE__)}.slice("Hello 世界", 7) + ["Hello ", "世界"] + """ + @impl MetaLogger.Slicer + @spec slice(binary(), MetaLogger.Slicer.max_entry_length()) :: [String.t()] + def slice(entry, max_entry_length) + when max_entry_length == :infinity + when byte_size(entry) <= max_entry_length, + do: [entry] + + def slice(entry, max_entry_length) do + do_slice(entry, max_entry_length, [], [], 0) + end + + @spec do_slice(binary(), integer(), [binary()], [iodata()], integer()) :: [String.t()] + defp do_slice(<<>>, _max_length, slices, partial_slice, _partial_size) do + # The remaining log entry is empty so we clean up the last partial_slice + # and return the slices. + partial_slice + |> case do + [] -> slices + _ -> bank_partial_slice(slices, partial_slice) + end + |> Enum.reverse() + end + + defp do_slice( + <>, + max_length, + slices, + partial_slice, + partial_size + ) do + codepoint_binary = <> + codepoint_size = byte_size(codepoint_binary) + new_size = partial_size + codepoint_size + + if new_size <= max_length do + # There is still room in the partial_slice for more codepoints + # so we prepend (to later reverse) the codepoint binary + # and consider the next codepoint. + do_slice(rest, max_length, slices, [codepoint_binary | partial_slice], new_size) + else + # Adding the new codepoint to the partial slice puts it over the limit + # So we bank the partial slice and start the codepoint as the new partial_slice + slices = bank_partial_slice(slices, partial_slice) + new_partial_slice = [codepoint_binary] + do_slice(rest, max_length, slices, new_partial_slice, codepoint_size) + end + end + + # Converts the inverted list of codepoints into a + # binary slice and appends it to our list of slices. + @spec bank_partial_slice([binary()], [iodata()]) :: [binary()] + defp bank_partial_slice(slices, partial_slice) do + [reconstruct_current_slice_as_binary(partial_slice) | slices] + end + + @spec reconstruct_current_slice_as_binary([iodata()]) :: binary() + defp reconstruct_current_slice_as_binary(current_slice) do + IO.iodata_to_binary(Enum.reverse(current_slice)) + end +end diff --git a/lib/tesla/middleware/meta_logger.ex b/lib/tesla/middleware/meta_logger.ex index 949d9b1..96766e8 100644 --- a/lib/tesla/middleware/meta_logger.ex +++ b/lib/tesla/middleware/meta_logger.ex @@ -3,7 +3,7 @@ if Code.ensure_loaded?(Tesla) do @moduledoc """ Tesla middleware to log requests and responses. - You can pass the options to the middleware or to the `Tesla.Env` request . The Tesla env + You can pass the options to the middleware or to the `Tesla.Env` request. The Tesla env options take precedence over the middleware options. ## Usage example @@ -17,7 +17,8 @@ if Code.ensure_loaded?(Tesla) do filter_query_params: [:api_key], log_level: :debug, log_tag: MyApp, - max_entry_length: :infinity + max_entry_length: :infinity, + slicer: MyCustomSlicer end ## Options @@ -27,17 +28,19 @@ if Code.ensure_loaded?(Tesla) do * `:filter_query_params` - The query params that should not be logged, the values will be replaced with `[FILTERED]`. Defaults to: `[]`. * `:filter_body` - The request and response body patterns that should not be logged, each - filter can be just a pattern, wich will be replaced by `"[FILTERED]"`, or it can be a tuple + filter can be just a pattern, which will be replaced by `"[FILTERED]"`, or it can be a tuple with the pattern and the replacement. Because the body filtering is applied to strings it is necessary that this middleware is the last one on the stack, so it receives the request body already encoded and the response body not yet decoded. If the body is not a string, the filtering will be skipped. * `:log_level` - The log level to be used, defaults to: `:info`. Responses with HTTP status code 400 and above will be logged with `:error`, and redirect with `:warning`. - * `:log_tag` - The log tag to be prefixed in the logs. Any non-string value will be inspect as + * `:log_tag` - The log tag to be prefixed in the logs. Any non-string value will be inspected as a string. Defaults to the current module name. - * `:max_entry_length` - The maximum length of a log entry before it is splitted into new ones. + * `:max_entry_length` - The maximum length of a log entry before it is split into new ones. Defaults to `:infinity`. + * `:slicer` - The module implementing the `MetaLogger.Slicer` behaviour to be used for slicing + log entries. Defaults to `MetaLogger.Slicer.DefaultImpl`. """ @@ -71,6 +74,7 @@ if Code.ensure_loaded?(Tesla) do |> maybe_put_default_value(:log_level, :info) |> maybe_put_default_value(:log_tag, __MODULE__) |> maybe_put_default_value(:max_entry_length, :infinity) + |> maybe_put_default_value(:slicer, Slicer.DefaultImpl) end @spec maybe_put_default_values(Env.opts(), [atom()], any()) :: Env.opts() @@ -79,7 +83,7 @@ if Code.ensure_loaded?(Tesla) do @spec maybe_put_default_value(Env.opts(), atom(), any()) :: Env.opts() defp maybe_put_default_value(options, key, default_value), - do: Keyword.put(options, key, Keyword.get(options, key, default_value)) + do: Keyword.put_new(options, key, default_value) @spec log_request(Env.t(), Env.opts()) :: Env.t() defp log_request(%Env{} = env, options) do @@ -182,9 +186,10 @@ if Code.ensure_loaded?(Tesla) do defp log(message, level, options) when is_binary(message) do max_entry_length = Keyword.get(options, :max_entry_length) + slicer = Keyword.get(options, :slicer) message - |> Slicer.slice(max_entry_length) + |> slicer.slice(max_entry_length) |> Enum.map(&prepend_tag(&1, options)) |> Enum.each(&MetaLogger.log(level, &1)) end diff --git a/test/meta_logger/slicer/default_impl_test.exs b/test/meta_logger/slicer/default_impl_test.exs new file mode 100644 index 0000000..25f248f --- /dev/null +++ b/test/meta_logger/slicer/default_impl_test.exs @@ -0,0 +1,60 @@ +defmodule MetaLogger.Slicer.DefaultImplTest do + use ExUnit.Case, async: true + + alias MetaLogger.Slicer.DefaultImpl, as: Subject + + doctest Subject + + describe "slice/2" do + setup do + entry = "0123456789" + + {:ok, entry: entry} + end + + test "when `:infinity` is given as max entry length, returns a list with one entry", %{ + entry: entry + } do + assert Subject.slice(entry, :infinity) == [entry] + end + + test "when max entry length is smaller than the size of given entry, " <> + "returns a list with one entry", + %{ + entry: entry + } do + assert Subject.slice(entry, 10) == ["0123456789"] + end + + test "when max entry length is half the size of given entry, returns a list with two entries", + %{ + entry: entry + } do + assert Subject.slice(entry, 5) == ["01234", "56789"] + end + + test "when given max entry length is three and the given entry size is 10, " <> + "returns a list with four entries", + %{ + entry: entry + } do + assert Subject.slice(entry, 3) == ["012", "345", "678", "9"] + end + + test "when an invalid max entry length is given, returns a list with one entry", %{ + entry: entry + } do + assert Subject.slice(entry, :foo) == [entry] + end + + test "when slicing a UTF-8 string not all slices will be valid UTF-8 strings" do + range_of_slices = + 7..1 + |> Enum.flat_map(fn max_length -> + Subject.slice("Hello 世界", max_length) + end) + + refute Enum.all?(range_of_slices, &String.valid?/1) + end + end +end diff --git a/test/meta_logger/slicer_test.exs b/test/meta_logger/slicer/utf8_impl_test.exs similarity index 73% rename from test/meta_logger/slicer_test.exs rename to test/meta_logger/slicer/utf8_impl_test.exs index 783a7b9..a52efd7 100644 --- a/test/meta_logger/slicer_test.exs +++ b/test/meta_logger/slicer/utf8_impl_test.exs @@ -1,7 +1,7 @@ -defmodule MetaLogger.SlicerTest do +defmodule MetaLogger.Slicer.Utf8ImplTest do use ExUnit.Case, async: true - alias MetaLogger.Slicer, as: Subject + alias MetaLogger.Slicer.Utf8Impl, as: Subject doctest Subject @@ -44,7 +44,15 @@ defmodule MetaLogger.SlicerTest do test "when an invalid max entry length is given, returns a list with one entry", %{ entry: entry } do - assert Subject.slice(entry, :pqp) == [entry] + assert Subject.slice(entry, :foo) == [entry] + end + + test "when slicing a UTF-8 string all slices will be valid UTF-8 strings" do + 7..1 + |> Enum.each(fn max_length -> + assert log_entries = Subject.slice("Hello 世界", max_length) + assert Enum.all?(log_entries, &String.valid?/1) + end) end end end diff --git a/test/tesla/middleware/meta_logger_test.exs b/test/tesla/middleware/meta_logger_test.exs index 8db95d5..75b1c31 100644 --- a/test/tesla/middleware/meta_logger_test.exs +++ b/test/tesla/middleware/meta_logger_test.exs @@ -5,6 +5,15 @@ defmodule Tesla.Middleware.MetaLoggerTest do alias Tesla.Middleware.MetaLogger, as: Subject + defmodule AlternativeSlicer do + @behaviour MetaLogger.Slicer + + @impl MetaLogger.Slicer + def slice(_entry, _max_entry_length) do + ["slice1", "slice2"] + end + end + defmodule FakeClient do use Tesla @@ -251,6 +260,18 @@ defmodule Tesla.Middleware.MetaLoggerTest do assert logs =~ "[warning] [#{inspect(Subject)}] response body moved" end + test "when a slicer module is given, uses it to slice the message" do + logs = + capture_log(fn -> + FakeClient.post("/huge-response", "1234567890", + opts: [slicer: __MODULE__.AlternativeSlicer] + ) + end) + + assert logs =~ "[debug] [Tesla.Middleware.MetaLogger] slice1" + assert logs =~ "[debug] [Tesla.Middleware.MetaLogger] slice2" + end + test "when the request fails to connect, logs the error" do logs = capture_log(fn -> FakeClient.get("/connection-error") end)