diff --git a/README.md b/README.md index 9356fd8..981c55e 100644 --- a/README.md +++ b/README.md @@ -13,11 +13,22 @@ cat < values.yaml replicaCount: 1 deployment: - image: quay.io/go-skynet/local-ai:latest + image: + repository: quay.io/go-skynet/local-ai # Example: "docker.io/myapp" + tag: latest env: threads: 4 context_size: 512 modelsPath: "/models" + download_model: + # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox + image: busybox + prompt_templates: + # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox + image: busybox + pullPolicy: IfNotPresent + imagePullSecrets: [] + # - name: secret-names resources: {} @@ -50,31 +61,51 @@ models: # The list of URLs to download models from # Note: the name of the file will be the name of the loaded model list: - - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" + # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" # basicAuth: base64EncodedCredentials - # Persistent storage for models and prompt templates. - # PVC and HostPath are mutually exclusive. If both are enabled, - # PVC configuration takes precedence. If neither are enabled, ephemeral - # storage is used. - persistence: - pvc: - enabled: false - size: 6Gi - accessModes: - - ReadWriteOnce - - annotations: {} - - # Optional - storageClass: ~ - - hostPath: - enabled: false - path: "/models" +initContainers: [] +# Example: +# - name: my-init-container +# image: my-init-image +# imagePullPolicy: IfNotPresent +# command: ["/bin/sh", "-c", "echo init"] +# volumeMounts: +# - name: my-volume +# mountPath: /path/to/mount + +sidecarContainers: [] +# Example: +# - name: my-sidecar-container +# image: my-sidecar-image +# imagePullPolicy: IfNotPresent +# ports: +# - containerPort: 1234 + +# Persistent storage for models and prompt templates. +# PVC and HostPath are mutually exclusive. If both are enabled, +# PVC configuration takes precedence. If neither are enabled, ephemeral +# storage is used. +persistence: + models: + enabled: true + annotations: {} + storageClass: longhorn + accessModes: ReadWriteMany + size: 100Gi + globalMount: /models + images: + enabled: true + annotations: {} + storageClass: longhorn + accessModes: ReadWriteMany + size: 5Gi + globalMount: /tmp/generated/images service: type: ClusterIP + # If deferring to an internal only load balancer + # externalTrafficPolicy: Local port: 80 annotations: {} # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout @@ -103,6 +134,8 @@ tolerations: [] affinity: {} + + ``` Install the LocalAI chart: ```bash diff --git a/charts/local-ai/Chart.yaml b/charts/local-ai/Chart.yaml index e4e9a6e..766f458 100644 --- a/charts/local-ai/Chart.yaml +++ b/charts/local-ai/Chart.yaml @@ -3,4 +3,4 @@ appVersion: 1.40 description: A Helm chart for deploying LocalAI to a Kubernetes cluster name: local-ai type: application -version: 2.1.3 +version: 3.0.0 diff --git a/charts/local-ai/templates/_pvc.yaml b/charts/local-ai/templates/_pvc.yaml new file mode 100644 index 0000000..c468757 --- /dev/null +++ b/charts/local-ai/templates/_pvc.yaml @@ -0,0 +1,23 @@ +{{- define "local-ai.pvc" }} +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ .name }} + namespace: {{ .namespace | quote }} + labels: {{ include "local-ai.labels" .labels | nindent 4 }} + {{- with .annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .storageClass }} + storageClassName: {{ . }} + {{- end }} + accessModes: + {{- range .accessModes }} + - {{ . | quote }} + {{- end }} + resources: + requests: + storage: {{ .size | quote }} +{{- end }} diff --git a/charts/local-ai/templates/deployment.yaml b/charts/local-ai/templates/deployment.yaml index b1d97c0..af33e97 100644 --- a/charts/local-ai/templates/deployment.yaml +++ b/charts/local-ai/templates/deployment.yaml @@ -27,11 +27,33 @@ spec: checksum/config-prompt-templates: {{ include (print $.Template.BasePath "/configmap-prompt-templates.yaml") . | sha256sum }} {{- end }} spec: - {{- with .Values.deployment.imagePullSecrets }} + {{- with .Values.deployment.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} - {{- end }} + {{- end }} initContainers: + # Additional initContainers from values.yaml + {{- range .Values.initContainers }} + - name: {{ .name }} + image: {{ .image }} + imagePullPolicy: {{ .imagePullPolicy }} + command: {{ .command }} + args: {{ .args | default list }} + env: + {{- toYaml .env | nindent 12 }} + resources: + {{- toYaml .resources | nindent 12 }} + volumeMounts: + {{- toYaml .volumeMounts | nindent 12 }} + {{- range $key, $pvc := .Values.persistence }} + {{- if $pvc.enabled }} + - name: {{ $key }} + mountPath: {{ $pvc.globalMount | default (print "/" $key) }} + {{- end }} + {{- end }} + securityContext: + {{- toYaml .securityContext | nindent 12 }} + {{- end }} {{- if .Values.promptTemplates }} - name: prompt-templates image: {{ .Values.deployment.prompt_templates.image }} @@ -43,8 +65,12 @@ spec: volumeMounts: - mountPath: /prompt-templates name: prompt-templates - - mountPath: /models - name: models + {{- range $key, $pvc := .Values.persistence }} + {{- if $pvc.enabled }} + - name: {{ $key }} + mountPath: {{ $pvc.globalMount | default (print "/" $key) }} + {{- end }} + {{- end }} {{- end }} - name: download-model image: {{ .Values.deployment.download_model.image }} @@ -55,74 +81,119 @@ spec: MODEL_DIR={{ .Values.deployment.modelsPath }} FORCE_DOWNLOAD={{ .Values.models.forceDownload }} URLS="{{ $urls }}" + LOCK_DIR=/tmp/model-download-locks mkdir -p "$MODEL_DIR" + mkdir -p "$LOCK_DIR" + mkdir -p "/tmp/generated/images" + mkdir -p "/tmp/generated/audio" + rm -rf "/models/lost+found" + - # Split urls on commas echo "$URLS" | awk -F, '{for (i=1; i<=NF; i++) print $i}' | while read -r line; do url=$(echo "$line" | awk '{print $1}') auth=$(echo "$line" | awk '{print $2}') + full_filename=$(basename "$url" .bin) + short_filename=$(echo "$full_filename" | cut -c1-20) + hash=$(echo "$full_filename" | sha256sum | cut -c1-12) + filename="${short_filename}_${hash}" + lockfile="$LOCK_DIR/$filename.lock" - if [ -n "$url" ]; then - filename=$(basename "$url" .bin) - - if [ "$FORCE_DOWNLOAD" = false ] && [ -f "$MODEL_DIR/$filename" ]; then - echo "File $filename already exists. Skipping download." - continue - fi + if [ -e "$MODEL_DIR/$filename" ]; then + echo "File $filename already exists. Skipping download." + continue + fi - rm -f "$MODEL_DIR/$filename" + if [ -e "$lockfile" ]; then + echo "Another pod is downloading $filename. Waiting for download to complete." + while [ -e "$lockfile" ]; do sleep 1; done + continue + fi - echo "Downloading $filename" + touch "$lockfile" - if [ -n "$auth" ]; then - wget --header "Authorization: Basic $auth" "$url" -O "$MODEL_DIR/$filename" - else - wget "$url" -O "$MODEL_DIR/$filename" - fi + echo "Downloading $filename" + if [ -n "$auth" ]; then + wget --header "Authorization: Basic $auth" "$url" -O "$MODEL_DIR/$filename" + else + wget "$url" -O "$MODEL_DIR/$filename" + fi - if [ "$?" -ne 0 ]; then - echo "Download failed." - else - echo "Download completed." - fi + if [ "$?" -ne 0 ]; then + echo "Download failed." + rm -f "$lockfile" + exit 1 + else + echo "Download completed." + rm -f "$lockfile" fi done volumeMounts: - - mountPath: {{ .Values.deployment.modelsPath }} - name: models + {{- range $key, $pvc := .Values.persistence }} + {{- if $pvc.enabled }} + - name: {{ $key }} + mountPath: {{ $pvc.globalMount | default (print "/" $key) }} + {{- end }} + {{- end }} containers: + # Sidecar containers from values.yaml + {{- range .Values.sidecarContainers }} + - name: {{ .name }} + image: {{ .image }} + imagePullPolicy: {{ .imagePullPolicy }} + command: {{ .command }} + args: {{ .args | default list }} + env: + {{- toYaml .env | nindent 12 }} + ports: + {{- toYaml .ports | nindent 12 }} + resources: + {{- toYaml .resources | nindent 12 }} + volumeMounts: + {{- toYaml .volumeMounts | nindent 12 }} + {{- range $key, $pvc := .Values.persistence }} + {{- if $pvc.enabled }} + - name: {{ $key }} + mountPath: {{ $pvc.globalMount | default (print "/" $key) }} + {{- end }} + {{- end }} + livenessProbe: + {{- toYaml .livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .readinessProbe | nindent 12 }} + securityContext: + {{- toYaml .securityContext | nindent 12 }} + {{- end }} - name: {{ template "local-ai.fullname" . }} - image: {{ .Values.deployment.image }} + image: "{{ .Values.deployment.image.repository }}:{{ .Values.deployment.image.tag }}" imagePullPolicy: {{ .Values.deployment.pullPolicy }} resources: {{- toYaml .Values.resources | nindent 12 }} env: - {{- range $key, $value := .Values.deployment.env }} - - name: {{ $key | upper }} - value: {{ quote $value }} - {{- end }} - - name: MODELS_PATH - value: {{ .Values.deployment.modelsPath }} + {{- range $key, $value := .Values.deployment.env }} + - name: {{ $key | upper }} + value: {{ quote $value }} + {{- end }} + - name: MODELS_PATH + value: {{ .Values.deployment.modelsPath }} volumeMounts: - - mountPath: {{ .Values.deployment.modelsPath }} - name: models + {{- range $key, $pvc := .Values.persistence }} + {{- if $pvc.enabled }} + - name: {{ $key }} + mountPath: {{ $pvc.globalMount | default (print "/" $key) }} + {{- end }} + {{- end }} volumes: - {{- if .Values.models.persistence.pvc.enabled }} - - name: models - persistentVolumeClaim: - claimName: {{ template "local-ai.fullname" . }} - {{- else if .Values.models.persistence.hostPath.enabled }} - - name: models - hostPath: - path: {{ .Values.models.persistence.hostPath.path }} - {{- else }} - - name: models - emptyDir: {} - {{- end }} - - name: prompt-templates - configMap: - name: {{ template "local-ai.fullname" . }}-prompt-templates + {{- range $key, $pvc := .Values.persistence }} + {{- if $pvc.enabled }} + - name: {{ $key }} + persistentVolumeClaim: + claimName: {{ printf "%s-%s" (include "local-ai.fullname" $) $key }} + {{- end }} + {{- end }} + - name: prompt-templates + configMap: + name: {{ template "local-ai.fullname" . }}-prompt-templates {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/charts/local-ai/templates/pvc-models.yaml b/charts/local-ai/templates/pvc-models.yaml deleted file mode 100644 index dae93af..0000000 --- a/charts/local-ai/templates/pvc-models.yaml +++ /dev/null @@ -1,23 +0,0 @@ -{{- if .Values.models.persistence.pvc.enabled }} -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: {{ template "local-ai.fullname" . }} - namespace: {{ .Release.Namespace | quote }} - labels: {{ include "local-ai.labels" . | nindent 4 }} - {{- with .Values.models.persistence.pvc.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - {{- with .Values.models.persistence.pvc.storageClass }} - storageClassName: {{ . }} - {{- end }} - {{- range .Values.models.persistence.pvc.accessModes }} - accessModes: - - {{ . | quote }} - {{- end }} - resources: - requests: - storage: {{ .Values.models.persistence.pvc.size | quote }} -{{- end }} diff --git a/charts/local-ai/templates/pvcs.yaml b/charts/local-ai/templates/pvcs.yaml new file mode 100644 index 0000000..356a63d --- /dev/null +++ b/charts/local-ai/templates/pvcs.yaml @@ -0,0 +1,5 @@ +{{- range $key, $pvc := .Values.persistence }} + {{- if $pvc.enabled }} + {{- include "local-ai.pvc" (dict "name" (printf "%s-%s" (include "local-ai.fullname" $) $key) "namespace" $.Release.Namespace "labels" (include "local-ai.labels" $) "annotations" $pvc.annotations "storageClass" $pvc.storageClass "accessModes" $pvc.accessModes "size" $pvc.size) }} + {{- end }} +{{- end }} diff --git a/charts/local-ai/values.yaml b/charts/local-ai/values.yaml index 270135c..ed51428 100644 --- a/charts/local-ai/values.yaml +++ b/charts/local-ai/values.yaml @@ -1,7 +1,9 @@ replicaCount: 1 deployment: - image: quay.io/go-skynet/local-ai:latest + image: + repository: quay.io/go-skynet/local-ai # Example: "docker.io/myapp" + tag: latest env: threads: 4 context_size: 512 @@ -50,25 +52,43 @@ models: # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" # basicAuth: base64EncodedCredentials - # Persistent storage for models and prompt templates. - # PVC and HostPath are mutually exclusive. If both are enabled, - # PVC configuration takes precedence. If neither are enabled, ephemeral - # storage is used. - persistence: - pvc: - enabled: false - size: 6Gi - accessModes: - - ReadWriteOnce - - annotations: {} - - # Optional - storageClass: ~ - - hostPath: - enabled: false - path: "/models" +initContainers: [] +# Example: +# - name: my-init-container +# image: my-init-image +# imagePullPolicy: IfNotPresent +# command: ["/bin/sh", "-c", "echo init"] +# volumeMounts: +# - name: my-volume +# mountPath: /path/to/mount + +sidecarContainers: [] +# Example: +# - name: my-sidecar-container +# image: my-sidecar-image +# imagePullPolicy: IfNotPresent +# ports: +# - containerPort: 1234 + +# Persistent storage for models and prompt templates. +# PVC and HostPath are mutually exclusive. If both are enabled, +# PVC configuration takes precedence. If neither are enabled, ephemeral +# storage is used. +persistence: + models: + enabled: true + annotations: {} + storageClass: hostPath + accessModes: ReadWriteMany + size: 10Gi + globalMount: /models + output: + enabled: true + annotations: {} + storageClass: hostPAth + accessModes: ReadWriteMany + size: 5Gi + globalMount: /tmp/generated service: type: ClusterIP @@ -102,6 +122,3 @@ tolerations: [] affinity: {} -image: - pullPolicy: IfNotPresent -