feat(backends): Drop bert.cpp (#4272)

* feat(backends): Drop bert.cpp use llama.cpp 3.2 as a drop-in replacement for bert.cpp Signed-off-by: Ettore Di Giacinto <[email protected]> * chore(tests): make test more robust Signed-off-by: Ettore Di Giacinto <[email protected]> --------- Signed-off-by: Ettore Di Giacinto <[email protected]>
mudler · Nov 27, 2024 · 3c3050f · 3c3050f
1 parent 1688ba7
commit 3c3050f
Show file tree

Hide file tree

Showing 13 changed files with 40 additions and 184 deletions.
diff --git a/Makefile b/Makefile
@@ -14,10 +14,6 @@ CPPLLAMA_VERSION?=30ec39832165627dd6ed98938df63adfc6e6a21a
 WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
 WHISPER_CPP_VERSION?=6266a9f9e56a5b925e9892acf650f3eb1245814d
 
-# bert.cpp version
-BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
-BERT_VERSION?=710044b124545415f555e4260d16b146c725a6e4
-
 # go-piper version
 PIPER_REPO?=https://github.com/mudler/go-piper
 PIPER_VERSION?=e10ca041a885d4a8f3871d52924b47792d5e5aa0
@@ -198,7 +194,6 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
 endif
 
 ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
-ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-avx2
 ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
@@ -228,19 +223,6 @@ endif
 
 all: help
 
-## BERT embeddings
-sources/go-bert.cpp:
-	mkdir -p sources/go-bert.cpp
-	cd sources/go-bert.cpp && \
-	git init && \
-	git remote add origin $(BERT_REPO) && \
-	git fetch origin && \
-	git checkout $(BERT_VERSION) && \
-	git submodule update --init --recursive --depth 1 --single-branch
-
-sources/go-bert.cpp/libgobert.a: sources/go-bert.cpp
-	$(MAKE) -C sources/go-bert.cpp libgobert.a
-
 ## go-llama.cpp
 sources/go-llama.cpp:
 	mkdir -p sources/go-llama.cpp
@@ -320,12 +302,11 @@ sources/whisper.cpp:
 sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
 	cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a
 
-get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
+get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
 
 replace:
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
 	$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
 	$(GOCMD) mod edit -replace github.com/M0Rf30/go-tiny-dream=$(CURDIR)/sources/go-tiny-dream
 	$(GOCMD) mod edit -replace github.com/mudler/go-piper=$(CURDIR)/sources/go-piper
 	$(GOCMD) mod edit -replace github.com/mudler/go-stable-diffusion=$(CURDIR)/sources/go-stable-diffusion
@@ -334,7 +315,6 @@ replace:
 dropreplace:
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
 	$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
-	$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
 	$(GOCMD) mod edit -dropreplace github.com/M0Rf30/go-tiny-dream
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-piper
 	$(GOCMD) mod edit -dropreplace github.com/mudler/go-stable-diffusion
@@ -349,7 +329,6 @@ rebuild: ## Rebuilds the project
 	$(MAKE) -C sources/go-llama.cpp clean
 	$(MAKE) -C sources/whisper.cpp clean
 	$(MAKE) -C sources/go-stable-diffusion clean
-	$(MAKE) -C sources/go-bert.cpp clean
 	$(MAKE) -C sources/go-piper clean
 	$(MAKE) -C sources/go-tiny-dream clean
 	$(MAKE) build
@@ -707,13 +686,6 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
 backend-assets/grpc: protogen-go replace
 	mkdir -p backend-assets/grpc
 
-backend-assets/grpc/bert-embeddings: sources/go-bert.cpp sources/go-bert.cpp/libgobert.a backend-assets/grpc
-	CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-bert.cpp LIBRARY_PATH=$(CURDIR)/sources/go-bert.cpp \
-	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bert-embeddings ./backend/go/llm/bert/
-ifneq ($(UPX),)
-	$(UPX) backend-assets/grpc/bert-embeddings
-endif
-
 backend-assets/grpc/huggingface: backend-assets/grpc
 	$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/
 ifneq ($(UPX),)

diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml
@@ -1,7 +1,7 @@
 name: text-embedding-ada-002
-backend: bert-embeddings
+embeddings: true
 parameters:
-  model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin
+  model: huggingface://hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf
 
 usage: |
     You can test this model with curl like this:

diff --git a/backend/go/llm/bert/bert.go b/backend/go/llm/bert/bert.go
diff --git a/backend/go/llm/bert/main.go b/backend/go/llm/bert/main.go
diff --git a/core/gallery/models_test.go b/core/gallery/models_test.go
@@ -12,6 +12,8 @@ import (
 	"gopkg.in/yaml.v3"
 )
 
+const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
+
 var _ = Describe("Model test", func() {
 
 	Context("Downloading", func() {
@@ -47,7 +49,7 @@ var _ = Describe("Model test", func() {
 
 			gallery := []GalleryModel{{
 				Name: "bert",
-				URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+				URL:  bertEmbeddingsURL,
 			}}
 			out, err := yaml.Marshal(gallery)
 			Expect(err).ToNot(HaveOccurred())
@@ -66,7 +68,7 @@ var _ = Describe("Model test", func() {
 			Expect(err).ToNot(HaveOccurred())
 			Expect(len(models)).To(Equal(1))
 			Expect(models[0].Name).To(Equal("bert"))
-			Expect(models[0].URL).To(Equal("https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"))
+			Expect(models[0].URL).To(Equal(bertEmbeddingsURL))
 			Expect(models[0].Installed).To(BeFalse())
 
 			err = InstallModelFromGallery(galleries, "test@bert", tempdir, GalleryModel{}, func(s1, s2, s3 string, f float64) {}, true)
@@ -78,7 +80,7 @@ var _ = Describe("Model test", func() {
 			content := map[string]interface{}{}
 			err = yaml.Unmarshal(dat, &content)
 			Expect(err).ToNot(HaveOccurred())
-			Expect(content["backend"]).To(Equal("bert-embeddings"))
+			Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 
 			models, err = AvailableGalleryModels(galleries, tempdir)
 			Expect(err).ToNot(HaveOccurred())

diff --git a/core/http/app_test.go b/core/http/app_test.go
@@ -240,6 +240,8 @@ func postInvalidRequest(url string) (error, int) {
 	return nil, resp.StatusCode
 }
 
+const bertEmbeddingsURL = `https://gist.githubusercontent.com/mudler/0a080b166b87640e8644b09c2aee6e3b/raw/f0e8c26bb72edc16d9fbafbfd6638072126ff225/bert-embeddings-gallery.yaml`
+
 //go:embed backend-assets/*
 var backendAssets embed.FS
 
@@ -279,13 +281,13 @@ var _ = Describe("API test", func() {
 			g := []gallery.GalleryModel{
 				{
 					Name: "bert",
-					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					URL:  bertEmbeddingsURL,
 				},
 				{
 					Name:            "bert2",
-					URL:             "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					URL:             bertEmbeddingsURL,
 					Overrides:       map[string]interface{}{"foo": "bar"},
-					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml"}},
+					AdditionalFiles: []gallery.File{{Filename: "foo.yaml", URI: bertEmbeddingsURL}},
 				},
 			}
 			out, err := yaml.Marshal(g)
@@ -383,7 +385,7 @@ var _ = Describe("API test", func() {
 				content := map[string]interface{}{}
 				err = yaml.Unmarshal(dat, &content)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(content["backend"]).To(Equal("bert-embeddings"))
+				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 				Expect(content["foo"]).To(Equal("bar"))
 
 				models, err = getModels("http://127.0.0.1:9090/models/available")
@@ -402,7 +404,7 @@ var _ = Describe("API test", func() {
 			It("overrides models", func() {
 
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:  "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					URL:  bertEmbeddingsURL,
 					Name: "bert",
 					Overrides: map[string]interface{}{
 						"backend": "llama",
@@ -451,7 +453,7 @@ var _ = Describe("API test", func() {
 			})
 			It("apply models without overrides", func() {
 				response := postModelApplyRequest("http://127.0.0.1:9090/models/apply", modelApplyRequest{
-					URL:       "https://raw.githubusercontent.com/go-skynet/model-gallery/main/bert-embeddings.yaml",
+					URL:       bertEmbeddingsURL,
 					Name:      "bert",
 					Overrides: map[string]interface{}{},
 				})
@@ -471,7 +473,7 @@ var _ = Describe("API test", func() {
 				content := map[string]interface{}{}
 				err = yaml.Unmarshal(dat, &content)
 				Expect(err).ToNot(HaveOccurred())
-				Expect(content["backend"]).To(Equal("bert-embeddings"))
+				Expect(content["usage"]).To(ContainSubstring("You can test this model with curl like this"))
 			})
 
 			It("runs openllama(llama-ggml backend)", Label("llama"), func() {
@@ -806,7 +808,7 @@ var _ = Describe("API test", func() {
 		It("returns the models list", func() {
 			models, err := client.ListModels(context.TODO())
 			Expect(err).ToNot(HaveOccurred())
-			Expect(len(models.Models)).To(Equal(6)) // If "config.yaml" should be included, this should be 8?
+			Expect(len(models.Models)).To(Equal(7)) // If "config.yaml" should be included, this should be 8?
 		})
 		It("can generate completions via ggml", func() {
 			resp, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "testmodel.ggml", Prompt: testPrompt})
@@ -866,8 +868,8 @@ var _ = Describe("API test", func() {
 				},
 			)
 			Expect(err).ToNot(HaveOccurred(), err)
-			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 384))
-			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 384))
+			Expect(len(resp.Data[0].Embedding)).To(BeNumerically("==", 2048))
+			Expect(len(resp.Data[1].Embedding)).To(BeNumerically("==", 2048))
 
 			sunEmbedding := resp.Data[0].Embedding
 			resp2, err := client.CreateEmbeddings(
@@ -951,7 +953,7 @@ var _ = Describe("API test", func() {
 					openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
 				Expect(err).ToNot(HaveOccurred())
 				Expect(len(resp.Choices) > 0).To(BeTrue())
-				Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five")))
+				Expect(strings.ToLower(resp.Choices[0].Message.Content)).To(Or(ContainSubstring("sure"), ContainSubstring("five"), ContainSubstring("5")))
 
 				stream, err := client.CreateChatCompletionStream(context.TODO(), openai.ChatCompletionRequest{Model: "rwkv_test", Messages: []openai.ChatCompletionMessage{{Content: "Can you count up to five?", Role: "user"}}})
 				Expect(err).ToNot(HaveOccurred())

diff --git a/docs/content/docs/features/embeddings.md b/docs/content/docs/features/embeddings.md
@@ -27,39 +27,6 @@ embeddings: true
 # .. other parameters
 ```
 
-## Bert embeddings
-
-To use `bert.cpp` models you can use the `bert` embedding backend.
-
-An example model config file:
-
-```yaml
-name: text-embedding-ada-002
-parameters:
-  model: bert
-backend: bert-embeddings
-embeddings: true
-# .. other parameters
-```
-
-The `bert` backend uses [bert.cpp](https://github.com/skeskinen/bert.cpp) and uses `ggml` models.
-
-For instance you can download the `ggml` quantized version of `all-MiniLM-L6-v2` from https://huggingface.co/skeskinen/ggml:
-
-```bash
-wget https://huggingface.co/skeskinen/ggml/resolve/main/all-MiniLM-L6-v2/ggml-model-q4_0.bin -O models/bert
-```
-
-To test locally (LocalAI server running on `localhost`),
-you can use `curl` (and `jq` at the end to prettify):
-
-```bash
-curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
-  "input": "Your text string goes here",
-  "model": "text-embedding-ada-002"
-}' | jq "."
-```
-
 ## Huggingface embeddings
 
 To use `sentence-transformers` and models in `huggingface` you can use the `sentencetransformers` embedding backend.
@@ -87,17 +54,26 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
 
 ## Llama.cpp embeddings
 
-Embeddings with `llama.cpp` are supported with the `llama` backend.
+Embeddings with `llama.cpp` are supported with the `llama-cpp` backend, it needs to be enabled with `embeddings` set to `true`.
 
 ```yaml
 name: my-awesome-model
-backend: llama
+backend: llama-cpp
 embeddings: true
 parameters:
   model: ggml-file.bin
 # ...
 ```
 
+Then you can use the API to generate embeddings:
+
+```bash
+curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{
+  "input": "My text",
+  "model": "my-awesome-model"
+}' | jq "."
+```
+
 ## 💡 Examples
 
 - Example that uses LLamaIndex and LocalAI as embedding: [here](https://github.com/go-skynet/LocalAI/tree/master/examples/query_data/).
diff --git a/docs/content/docs/features/model-gallery.md b/docs/content/docs/features/model-gallery.md
@@ -300,7 +300,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
 
 ```bash
 curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
-     "url": "github:mudler/LocalAI/gallery/bert-embeddings.yaml",
+     "id": "bert-embeddings",
      "name": "text-embedding-ada-002"
    }'  
 ```

diff --git a/embedded/models/bert-cpp.yaml b/embedded/models/bert-cpp.yaml
diff --git a/gallery/bert-embeddings.yaml b/gallery/bert-embeddings.yaml