Skip to content

Commit

Permalink
Merge branch 'master' into feat-request-middleware
Browse files Browse the repository at this point in the history
  • Loading branch information
dave-gray101 authored Nov 26, 2024
2 parents 95c722d + e8128a3 commit a0aac96
Show file tree
Hide file tree
Showing 16 changed files with 90 additions and 186 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/checksum_checker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
sudo pip install --upgrade pip
pip install huggingface_hub
- name: 'Setup yq'
uses: dcarbone/[email protected].0
uses: dcarbone/[email protected].1
with:
version: 'v4.44.2'
download-compressed: true
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ prepare-sources

go-ggml-transformers
go-gpt2
go-rwkv
whisper.cpp
/bloomz
go-bert
Expand Down
35 changes: 2 additions & 33 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=cce5a9007572c6e9fa522296b77571d2e5071357

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6
CPPLLAMA_VERSION?=47f931c8f9a26c072d71224bc8013cc66ea9e445

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
Expand Down Expand Up @@ -209,7 +205,6 @@ ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-grpc
ALL_GRPC_BACKENDS+=backend-assets/util/llama-cpp-rpc-server
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
ALL_GRPC_BACKENDS+=backend-assets/grpc/whisper
ALL_GRPC_BACKENDS+=backend-assets/grpc/local-store
ALL_GRPC_BACKENDS+=backend-assets/grpc/silero-vad
Expand Down Expand Up @@ -272,20 +267,6 @@ sources/go-piper:
sources/go-piper/libpiper_binding.a: sources/go-piper
$(MAKE) -C sources/go-piper libpiper_binding.a example/main piper.o


## RWKV
sources/go-rwkv.cpp:
mkdir -p sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && \
git init && \
git remote add origin $(RWKV_REPO) && \
git fetch origin && \
git checkout $(RWKV_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch

sources/go-rwkv.cpp/librwkv.a: sources/go-rwkv.cpp
cd sources/go-rwkv.cpp && cd rwkv.cpp && cmake . -DRWKV_BUILD_SHARED_LIBRARY=OFF && cmake --build . && cp librwkv.a ..

## stable diffusion
sources/go-stable-diffusion:
mkdir -p sources/go-stable-diffusion
Expand Down Expand Up @@ -339,10 +320,9 @@ sources/whisper.cpp:
sources/whisper.cpp/libwhisper.a: sources/whisper.cpp
cd sources/whisper.cpp && $(MAKE) libwhisper.a libggml.a

get-sources: sources/go-llama.cpp sources/go-piper sources/go-rwkv.cpp sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp
get-sources: sources/go-llama.cpp sources/go-piper sources/whisper.cpp sources/go-bert.cpp sources/go-stable-diffusion sources/go-tiny-dream backend/cpp/llama/llama.cpp

replace:
$(GOCMD) mod edit -replace github.com/donomii/go-rwkv.cpp=$(CURDIR)/sources/go-rwkv.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp/bindings/go=$(CURDIR)/sources/whisper.cpp/bindings/go
$(GOCMD) mod edit -replace github.com/go-skynet/go-bert.cpp=$(CURDIR)/sources/go-bert.cpp
Expand All @@ -352,7 +332,6 @@ replace:
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(CURDIR)/sources/go-llama.cpp

dropreplace:
$(GOCMD) mod edit -dropreplace github.com/donomii/go-rwkv.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp
$(GOCMD) mod edit -dropreplace github.com/ggerganov/whisper.cpp/bindings/go
$(GOCMD) mod edit -dropreplace github.com/go-skynet/go-bert.cpp
Expand All @@ -368,7 +347,6 @@ prepare-sources: get-sources replace
rebuild: ## Rebuilds the project
$(GOCMD) clean -cache
$(MAKE) -C sources/go-llama.cpp clean
$(MAKE) -C sources/go-rwkv.cpp clean
$(MAKE) -C sources/whisper.cpp clean
$(MAKE) -C sources/go-stable-diffusion clean
$(MAKE) -C sources/go-bert.cpp clean
Expand Down Expand Up @@ -477,8 +455,6 @@ test-models/testmodel.ggml:
wget -q https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin -O test-models/whisper-en
wget -q https://huggingface.co/mudler/all-MiniLM-L6-v2/resolve/main/ggml-model-q4_0.bin -O test-models/bert
wget -q https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav -O test-dir/audio.wav
wget -q https://huggingface.co/mudler/rwkv-4-raven-1.5B-ggml/resolve/main/RWKV-4-Raven-1B5-v11-Eng99%2525-Other1%2525-20230425-ctx4096_Q4_0.bin -O test-models/rwkv
wget -q https://raw.githubusercontent.com/saharNooby/rwkv.cpp/5eb8f09c146ea8124633ab041d9ea0b1f1db4459/rwkv/20B_tokenizer.json -O test-models/rwkv.tokenizer.json
cp tests/models_fixtures/* test-models

prepare-test: grpcs
Expand Down Expand Up @@ -855,13 +831,6 @@ ifneq ($(UPX),)
$(UPX) backend-assets/grpc/piper
endif

backend-assets/grpc/rwkv: sources/go-rwkv.cpp sources/go-rwkv.cpp/librwkv.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-rwkv.cpp LIBRARY_PATH=$(CURDIR)/sources/go-rwkv.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/rwkv ./backend/go/llm/rwkv
ifneq ($(UPX),)
$(UPX) backend-assets/grpc/rwkv
endif

backend-assets/grpc/stablediffusion: sources/go-stable-diffusion sources/go-stable-diffusion/libstablediffusion.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/go-stable-diffusion/:/usr/include/opencv4" LIBRARY_PATH=$(CURDIR)/sources/go-stable-diffusion/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/stablediffusion ./backend/go/image/stablediffusion
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,6 @@ LocalAI couldn't have been built without the help of great software already avai
- https://github.com/antimatter15/alpaca.cpp
- https://github.com/EdVince/Stable-Diffusion-NCNN
- https://github.com/ggerganov/whisper.cpp
- https://github.com/saharNooby/rwkv.cpp
- https://github.com/rhasspy/piper

## 🤗 Contributors
Expand Down
5 changes: 3 additions & 2 deletions backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ struct llama_client_slot
std::string stopping_word;

// sampling
struct common_sampler_params sparams;
struct common_params_sampling sparams;
common_sampler *ctx_sampling = nullptr;

int32_t ga_i = 0; // group-attention state
Expand Down Expand Up @@ -662,7 +662,7 @@ struct llama_server_context

bool launch_slot_with_data(llama_client_slot* &slot, json data) {
slot_params default_params;
common_sampler_params default_sparams;
common_params_sampling default_sparams;

slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
Expand Down Expand Up @@ -2299,6 +2299,7 @@ static void params_parse(const backend::ModelOptions* request,
params.use_mmap = request->mmap();
params.flash_attn = request->flashattention();
params.no_kv_offload = request->nokvoffload();
params.ctx_shift = false; // We control context-shifting in any case (and we disable it as it could just lead to infinite loops)

params.embedding = request->embeddings();

Expand Down
21 changes: 0 additions & 21 deletions backend/go/llm/rwkv/main.go

This file was deleted.

95 changes: 0 additions & 95 deletions backend/go/llm/rwkv/rwkv.go

This file was deleted.

4 changes: 2 additions & 2 deletions backend/python/openvoice/requirements-intel.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ optimum[openvino]
grpcio==1.68.0
protobuf
librosa==0.9.1
faster-whisper==1.1.0
faster-whisper==0.9.0
pydub==0.25.1
wavmark==0.0.3
numpy==1.22.0
eng_to_ipa==0.0.2
inflect==7.0.0
unidecode==1.3.7
whisper-timestamped==1.15.8
whisper-timestamped==1.14.2
openai
python-dotenv
pypinyin==0.50.0
Expand Down
41 changes: 39 additions & 2 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,27 @@
---
- &rwkv
url: "github:mudler/LocalAI/gallery/rwkv.yaml@master"
name: "rwkv-6-world-7b"
license: apache-2.0
urls:
- https://huggingface.co/RWKV/rwkv-6-world-7b
- https://huggingface.co/bartowski/rwkv-6-world-7b-GGUF
tags:
- llm
- rwkv
- cpu
- gpu
- rnn
description: |
RWKV (pronounced RwaKuv) is an RNN with GPT-level LLM performance, and can also be directly trained like a GPT transformer (parallelizable). We are at RWKV-7.
So it's combining the best of RNN and transformer - great performance, fast inference, fast training, saves VRAM, "infinite" ctxlen, and free text embedding. Moreover it's 100% attention-free, and a Linux Foundation AI project.
overrides:
parameters:
model: rwkv-6-world-7b-Q4_K_M.gguf
files:
- filename: rwkv-6-world-7b-Q4_K_M.gguf
sha256: f74574186fa4584f405e92198605680db6ad00fd77974ffa14bf02073bb90273
uri: huggingface://bartowski/rwkv-6-world-7b-GGUF/rwkv-6-world-7b-Q4_K_M.gguf
- &qwen25coder
name: "qwen2.5-coder-14b"
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
Expand Down Expand Up @@ -678,8 +701,8 @@
model: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
files:
- filename: Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
sha256: 0b1c10da004ffd61b860c9058265e9bdb7f53c7be8e87feece8896d680f5b8be
uri: huggingface://QuantFactory/Llama-Sentient-3.2-3B-Instruct-GGUF/Llama-Sentient-3.2-3B-Instruct.Q4_K_M.gguf
sha256: 3f855ce0522bfdc39fc826162ba6d89f15cc3740c5207da10e70baa3348b7812
- &qwen25
## Qwen2.5
name: "qwen2.5-14b-instruct"
Expand Down Expand Up @@ -2165,6 +2188,20 @@
- filename: Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
sha256: 9eaa08a4872a26f56fe34b27a99f7bd0d22ee2b2d1c84cfcde2091b5f61af5fa
uri: huggingface://mradermacher/Llama-3.1-Swallow-70B-v0.1-i1-GGUF/Llama-3.1-Swallow-70B-v0.1.i1-Q4_K_M.gguf
- !!merge <<: *llama31
name: "llama-3.1_openscholar-8b"
urls:
- https://huggingface.co/OpenScholar/Llama-3.1_OpenScholar-8B
- https://huggingface.co/bartowski/Llama-3.1_OpenScholar-8B-GGUF
description: |
Llama-3.1_OpenScholar-8B is a fine-tuned 8B for scientific literature synthesis. The Llama-3.1_OpenScholar-8B us trained on the os-data dataset. Developed by: University of Washigton, Allen Institute for AI (AI2)
overrides:
parameters:
model: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
files:
- filename: Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
sha256: 54865fc86451959b495c494a51bb1806c8b62bf1415600f0da2966a8a1fe6c7d
uri: huggingface://bartowski/Llama-3.1_OpenScholar-8B-GGUF/Llama-3.1_OpenScholar-8B-Q4_K_M.gguf
## Uncensored models
- !!merge <<: *llama31
name: "humanish-roleplay-llama-3.1-8b-i1"
Expand Down Expand Up @@ -3496,7 +3533,7 @@
- https://huggingface.co/AIDC-AI/Marco-o1
- https://huggingface.co/QuantFactory/Marco-o1-GGUF
description: |
Marco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: "Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?"
Marco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: "Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?"
overrides:
parameters:
model: Marco-o1.Q4_K_M.gguf
Expand Down
23 changes: 23 additions & 0 deletions gallery/rwkv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
name: "rwkv"

config_file: |
parameters:
top_k: 80
temperature: 0.9
max_tokens: 4098
top_p: 0.8
context_size: 4098
roles:
user: "User: "
system: "System: "
assistant: "Assistant: "
stopwords:
- 'Assistant:'
template:
chat: "{{.Input}}\nAssistant: "
completion: |
{{.Input}}
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ require (
github.com/cncf/xds/go v0.0.0-20240423153145-555b57ec207b
github.com/containerd/containerd v1.7.19
github.com/dave-gray101/v2keyauth v0.0.0-20240624150259-c45d584d25e2
github.com/donomii/go-rwkv.cpp v0.0.0-20240228065144-661e7ae26d44
github.com/elliotchance/orderedmap/v2 v2.2.0
github.com/fsnotify/fsnotify v1.7.0
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240626202019-c118733a29ad
Expand Down
1 change: 0 additions & 1 deletion pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ const (
LLamaCPPGRPC = "llama-cpp-grpc"

BertEmbeddingsBackend = "bert-embeddings"
RwkvBackend = "rwkv"
WhisperBackend = "whisper"
StableDiffusionBackend = "stablediffusion"
TinyDreamBackend = "tinydream"
Expand Down
5 changes: 4 additions & 1 deletion scripts/model_gallery_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,10 @@ def format_description(description):
if readmeFile:
# If there is a README file, read it
readme = fs.read_text(readmeFile)
summarized_readme = summarize(readme)
try:
summarized_readme = summarize(readme)
except Exception as e:
print(f"Error summarizing the README: {str(e)}", file=sys.stderr)
summarized_readme = format_description(summarized_readme)

print("Model correctly processed")
Expand Down
Loading

0 comments on commit a0aac96

Please sign in to comment.