From 978950776537cb7f73625d809f58af945f4685ba Mon Sep 17 00:00:00 2001 From: Nico Albanese Date: Mon, 2 Dec 2024 13:35:01 +0000 Subject: [PATCH 1/3] updates --- content/cookbook/01-next/122-caching-middleware.mdx | 10 +++------- content/docs/06-advanced/04-caching.mdx | 10 +++------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/content/cookbook/01-next/122-caching-middleware.mdx b/content/cookbook/01-next/122-caching-middleware.mdx index 9721e773e1e8..58e0df392cd3 100644 --- a/content/cookbook/01-next/122-caching-middleware.mdx +++ b/content/cookbook/01-next/122-caching-middleware.mdx @@ -104,10 +104,9 @@ export const cacheMiddleware: LanguageModelV1Middleware = { } // If not cached, proceed with streaming - const { stream, rawCall, rawResponse, request, warnings } = - await doStream(); + const { stream, ...rest } = await doStream(); - const fullResponse: unknown[] = []; + const fullResponse: LanguageModelV1StreamPart[] = []; const transformStream = new TransformStream< LanguageModelV1StreamPart, @@ -125,10 +124,7 @@ export const cacheMiddleware: LanguageModelV1Middleware = { return { stream: stream.pipeThrough(transformStream), - rawCall, - rawResponse, - request, - warnings, + ...rest, }; }, }; diff --git a/content/docs/06-advanced/04-caching.mdx b/content/docs/06-advanced/04-caching.mdx index 77b81996ed17..8857b7350542 100644 --- a/content/docs/06-advanced/04-caching.mdx +++ b/content/docs/06-advanced/04-caching.mdx @@ -58,10 +58,9 @@ export const cacheMiddleware: LanguageModelV1Middleware = { } // If not cached, proceed with streaming - const { stream, rawCall, rawResponse, request, warnings } = - await doStream(); + const { stream, ...rest } = await doStream(); - const fullResponse: unknown[] = []; + const fullResponse: LanguageModelV1StreamPart[] = []; const transformStream = new TransformStream< LanguageModelV1StreamPart, @@ -79,10 +78,7 @@ export const cacheMiddleware: LanguageModelV1Middleware = { return { stream: stream.pipeThrough(transformStream), - rawCall, - rawResponse, - request, - warnings, + ...rest, }; }, }; From c6f2445b7fcc69d4e8a6a24de018a173807eee10 Mon Sep 17 00:00:00 2001 From: Nico Albanese Date: Mon, 2 Dec 2024 14:48:47 +0000 Subject: [PATCH 2/3] fixes --- .../01-next/122-caching-middleware.mdx | 36 ++++++++++++++----- content/docs/06-advanced/04-caching.mdx | 36 ++++++++++++++----- 2 files changed, 56 insertions(+), 16 deletions(-) diff --git a/content/cookbook/01-next/122-caching-middleware.mdx b/content/cookbook/01-next/122-caching-middleware.mdx index 58e0df392cd3..4323fc0c9d3e 100644 --- a/content/cookbook/01-next/122-caching-middleware.mdx +++ b/content/cookbook/01-next/122-caching-middleware.mdx @@ -58,13 +58,13 @@ Next, you will create a `LanguageModelMiddleware` that caches the assistant's re For `wrapGenerate`, you can cache the response directly. Instead, for `wrapStream`, you cache an array of the stream parts, which can then be used with [`simulateReadableStream`](/docs/ai-sdk-core/testing#simulate-data-stream-protocol-responses) function to create a simulated `ReadableStream` that returns the cached response. In this way, the cached response is returned chunk-by-chunk as if it were being generated by the model. You can control the initial delay and delay between chunks by adjusting the `initialDelayInMs` and `chunkDelayInMs` parameters of `simulateReadableStream`. ```tsx filename='ai/middleware.ts' -import { Redis } from '@upstash/redis'; +import { Redis } from "@upstash/redis"; import type { LanguageModelV1, Experimental_LanguageModelV1Middleware as LanguageModelV1Middleware, LanguageModelV1StreamPart, -} from 'ai'; -import { simulateReadableStream } from 'ai/test'; +} from "ai"; +import { simulateReadableStream } from "ai/test"; const redis = new Redis({ url: process.env.KV_URL, @@ -75,14 +75,25 @@ export const cacheMiddleware: LanguageModelV1Middleware = { wrapGenerate: async ({ doGenerate, params }) => { const cacheKey = JSON.stringify(params); - const cached = await redis.get(cacheKey); + const cached = (await redis.get(cacheKey)) as Awaited< + ReturnType + > | null; + if (cached !== null) { - return cached as ReturnType; + return { + ...cached, + response: { + ...cached.response, + timestamp: cached?.response?.timestamp + ? new Date(cached?.response?.timestamp) + : undefined, + }, + }; } const result = await doGenerate(); - redis.set(cacheKey, { ...result, response: null }); // setting response as null avoids generateText useChat error + redis.set(cacheKey, result); return result; }, @@ -91,13 +102,22 @@ export const cacheMiddleware: LanguageModelV1Middleware = { // Check if the result is in the cache const cached = await redis.get(cacheKey); + + // If cached, return a simulated ReadableStream that yields the cached result if (cached !== null) { - // If cached, return a simulated ReadableStream that yields the cached result + // Format the timestamps in the cached response + const formattedChunks = (cached as LanguageModelV1StreamPart[]).map( + (p) => { + if (p.type === "response-metadata" && p.timestamp) { + return { ...p, timestamp: new Date(p.timestamp) }; + } else return p; + }, + ); return { stream: simulateReadableStream({ initialDelayInMs: 0, chunkDelayInMs: 10, - chunks: cached as LanguageModelV1StreamPart[], + chunks: formattedChunks, }), rawCall: { rawPrompt: null, rawSettings: {} }, }; diff --git a/content/docs/06-advanced/04-caching.mdx b/content/docs/06-advanced/04-caching.mdx index 8857b7350542..5e776a268f8a 100644 --- a/content/docs/06-advanced/04-caching.mdx +++ b/content/docs/06-advanced/04-caching.mdx @@ -12,13 +12,13 @@ Depending on the type of application you're building, you may want to cache the The recommended approach to caching responses is using [language model middleware](/docs/ai-sdk-core/middleware). Language model middleware is a way to enhance the behavior of language models by intercepting and modifying the calls to the language model. Let's see how you can use language model middleware to cache responses. ```ts filename="ai/middleware.ts" -import { Redis } from '@upstash/redis'; +import { Redis } from "@upstash/redis"; import type { LanguageModelV1, Experimental_LanguageModelV1Middleware as LanguageModelV1Middleware, LanguageModelV1StreamPart, -} from 'ai'; -import { simulateReadableStream } from 'ai/test'; +} from "ai"; +import { simulateReadableStream } from "ai/test"; const redis = new Redis({ url: process.env.KV_URL, @@ -29,14 +29,25 @@ export const cacheMiddleware: LanguageModelV1Middleware = { wrapGenerate: async ({ doGenerate, params }) => { const cacheKey = JSON.stringify(params); - const cached = await redis.get(cacheKey); + const cached = (await redis.get(cacheKey)) as Awaited< + ReturnType + > | null; + if (cached !== null) { - return cached as ReturnType; + return { + ...cached, + response: { + ...cached.response, + timestamp: cached?.response?.timestamp + ? new Date(cached?.response?.timestamp) + : undefined, + }, + }; } const result = await doGenerate(); - redis.set(cacheKey, { ...result, response: null }); // setting response as null avoids generateText useChat error + redis.set(cacheKey, result); return result; }, @@ -45,13 +56,22 @@ export const cacheMiddleware: LanguageModelV1Middleware = { // Check if the result is in the cache const cached = await redis.get(cacheKey); + + // If cached, return a simulated ReadableStream that yields the cached result if (cached !== null) { - // If cached, return a simulated ReadableStream that yields the cached result + // Format the timestamps in the cached response + const formattedChunks = (cached as LanguageModelV1StreamPart[]).map( + (p) => { + if (p.type === "response-metadata" && p.timestamp) { + return { ...p, timestamp: new Date(p.timestamp) }; + } else return p; + }, + ); return { stream: simulateReadableStream({ initialDelayInMs: 0, chunkDelayInMs: 10, - chunks: cached as LanguageModelV1StreamPart[], + chunks: formattedChunks, }), rawCall: { rawPrompt: null, rawSettings: {} }, }; From df21063b79c3fecab74f58718ca31fd4c614c0cb Mon Sep 17 00:00:00 2001 From: Nico Albanese Date: Mon, 2 Dec 2024 14:49:21 +0000 Subject: [PATCH 3/3] prettier fix --- .../01-next/122-caching-middleware.mdx | 20 +++++++++---------- content/docs/06-advanced/04-caching.mdx | 20 +++++++++---------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/content/cookbook/01-next/122-caching-middleware.mdx b/content/cookbook/01-next/122-caching-middleware.mdx index 4323fc0c9d3e..db7280b5c364 100644 --- a/content/cookbook/01-next/122-caching-middleware.mdx +++ b/content/cookbook/01-next/122-caching-middleware.mdx @@ -58,13 +58,13 @@ Next, you will create a `LanguageModelMiddleware` that caches the assistant's re For `wrapGenerate`, you can cache the response directly. Instead, for `wrapStream`, you cache an array of the stream parts, which can then be used with [`simulateReadableStream`](/docs/ai-sdk-core/testing#simulate-data-stream-protocol-responses) function to create a simulated `ReadableStream` that returns the cached response. In this way, the cached response is returned chunk-by-chunk as if it were being generated by the model. You can control the initial delay and delay between chunks by adjusting the `initialDelayInMs` and `chunkDelayInMs` parameters of `simulateReadableStream`. ```tsx filename='ai/middleware.ts' -import { Redis } from "@upstash/redis"; +import { Redis } from '@upstash/redis'; import type { LanguageModelV1, Experimental_LanguageModelV1Middleware as LanguageModelV1Middleware, LanguageModelV1StreamPart, -} from "ai"; -import { simulateReadableStream } from "ai/test"; +} from 'ai'; +import { simulateReadableStream } from 'ai/test'; const redis = new Redis({ url: process.env.KV_URL, @@ -76,7 +76,7 @@ export const cacheMiddleware: LanguageModelV1Middleware = { const cacheKey = JSON.stringify(params); const cached = (await redis.get(cacheKey)) as Awaited< - ReturnType + ReturnType > | null; if (cached !== null) { @@ -106,13 +106,11 @@ export const cacheMiddleware: LanguageModelV1Middleware = { // If cached, return a simulated ReadableStream that yields the cached result if (cached !== null) { // Format the timestamps in the cached response - const formattedChunks = (cached as LanguageModelV1StreamPart[]).map( - (p) => { - if (p.type === "response-metadata" && p.timestamp) { - return { ...p, timestamp: new Date(p.timestamp) }; - } else return p; - }, - ); + const formattedChunks = (cached as LanguageModelV1StreamPart[]).map(p => { + if (p.type === 'response-metadata' && p.timestamp) { + return { ...p, timestamp: new Date(p.timestamp) }; + } else return p; + }); return { stream: simulateReadableStream({ initialDelayInMs: 0, diff --git a/content/docs/06-advanced/04-caching.mdx b/content/docs/06-advanced/04-caching.mdx index 5e776a268f8a..7cc1d35af0ae 100644 --- a/content/docs/06-advanced/04-caching.mdx +++ b/content/docs/06-advanced/04-caching.mdx @@ -12,13 +12,13 @@ Depending on the type of application you're building, you may want to cache the The recommended approach to caching responses is using [language model middleware](/docs/ai-sdk-core/middleware). Language model middleware is a way to enhance the behavior of language models by intercepting and modifying the calls to the language model. Let's see how you can use language model middleware to cache responses. ```ts filename="ai/middleware.ts" -import { Redis } from "@upstash/redis"; +import { Redis } from '@upstash/redis'; import type { LanguageModelV1, Experimental_LanguageModelV1Middleware as LanguageModelV1Middleware, LanguageModelV1StreamPart, -} from "ai"; -import { simulateReadableStream } from "ai/test"; +} from 'ai'; +import { simulateReadableStream } from 'ai/test'; const redis = new Redis({ url: process.env.KV_URL, @@ -30,7 +30,7 @@ export const cacheMiddleware: LanguageModelV1Middleware = { const cacheKey = JSON.stringify(params); const cached = (await redis.get(cacheKey)) as Awaited< - ReturnType + ReturnType > | null; if (cached !== null) { @@ -60,13 +60,11 @@ export const cacheMiddleware: LanguageModelV1Middleware = { // If cached, return a simulated ReadableStream that yields the cached result if (cached !== null) { // Format the timestamps in the cached response - const formattedChunks = (cached as LanguageModelV1StreamPart[]).map( - (p) => { - if (p.type === "response-metadata" && p.timestamp) { - return { ...p, timestamp: new Date(p.timestamp) }; - } else return p; - }, - ); + const formattedChunks = (cached as LanguageModelV1StreamPart[]).map(p => { + if (p.type === 'response-metadata' && p.timestamp) { + return { ...p, timestamp: new Date(p.timestamp) }; + } else return p; + }); return { stream: simulateReadableStream({ initialDelayInMs: 0,