Add flag to disable macOS Metal for llama.cpp when running builds

neuml · May 28, 2024 · 7f241b0 · 7f241b0
1 parent 88adf4d
commit 7f241b0
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 2 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -41,6 +41,7 @@ jobs:
         run: |
           echo "OMP_NUM_THREADS=1" >> $GITHUB_ENV
           echo "PYTORCH_MPS_DISABLE=1" >> $GITHUB_ENV
+          echo "LLAMA_NO_METAL=1" >> $GITHUB_ENV
           echo "ACCELERATE_USE_CPU=1" >> $GITHUB_ENV
           echo "TIKA_STARTUP_SLEEP=30" >> $GITHUB_ENV
           echo "TIKA_STARTUP_MAX_RETRY=10" >> $GITHUB_ENV

diff --git a/src/python/txtai/pipeline/llm/llama.py b/src/python/txtai/pipeline/llm/llama.py
@@ -46,7 +46,7 @@ def __init__(self, path, template=None, **kwargs):
         path = path if os.path.exists(path) else self.download(path)
 
         # Default GPU layers if not already set
-        kwargs["n_gpu_layers"] = kwargs.get("n_gpu_layers", -1 if kwargs.get("gpu", True) else 0)
+        kwargs["n_gpu_layers"] = kwargs.get("n_gpu_layers", -1 if kwargs.get("gpu", os.environ.get("LLAMA_NO_METAL") != "1") else 0)
 
         # Create llama.cpp instance
         self.llm = Llama(path, verbose=kwargs.pop("verbose", False), **kwargs)

diff --git a/src/python/txtai/vectors/llama.py b/src/python/txtai/vectors/llama.py
@@ -53,7 +53,7 @@ def loadmodel(self, path):
         modelargs = self.config.get("vectors", {})
 
         # Default GPU layers if not already set
-        modelargs["n_gpu_layers"] = modelargs.get("n_gpu_layers", -1 if self.config.get("gpu", True) else 0)
+        modelargs["n_gpu_layers"] = modelargs.get("n_gpu_layers", -1 if modelargs.get("gpu", os.environ.get("LLAMA_NO_METAL") != "1") else 0)
 
         # Create llama.cpp instance
         return Llama(path, verbose=modelargs.pop("verbose", False), embedding=True, **modelargs)