fix llamacpp and windows libuv (#298)

IlyasMoutawwakil · web-flow · commit 9104793fa9ba · 2024-11-26T16:12:50.000+01:00
diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
@@ -70,14 +70,11 @@ def __init__(self, config: BackendConfigT):
 
         elif self.config.library == "llama_cpp":
             self.logger.info("\t+ Benchmarking a LlamaCpp model")
-            # TOD: need a custom method to extract shapes from gguf
-            self.model_shapes = extract_transformers_shapes_from_artifacts(
-                self.pretrained_config, self.pretrained_processor
-            )
             self.pretrained_processor = None
-            self.generation_config = None
             self.pretrained_config = None
+            self.generation_config = None
             self.automodel_loader = None
+            self.model_shapes = {}
 
         else:
             self.logger.info("\t+ Benchmarking a Transformers model")
diff --git a/optimum_benchmark/backends/llama_cpp/backend.py b/optimum_benchmark/backends/llama_cpp/backend.py
@@ -41,15 +41,10 @@ def llama_cpp_kwargs(self) -> Dict[str, Any]:
             "echo": False,
         }
 
-    def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]:
-        if self.config.task == "text-generation":
-            if input_shapes["batch_size"] != 1:
-                raise ValueError("Batch size must be 1 for LlamaCpp text generation")
-
-        return input_shapes
-
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
         if self.config.task == "text-generation":
+            if inputs["input_ids"].shape[0] != 1:
+                raise ValueError("Batch size must be 1 for LlamaCpp text generation")
             return {"tokens": inputs["input_ids"].squeeze(0).tolist()}
 
         elif self.config.task == "feature-extraction":
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
@@ -25,7 +25,7 @@
 from .config import PyTorchConfig
 
 if is_deepspeed_available():
-    import deepspeed
+    import deepspeed  # type: ignore
 
 if is_torch_distributed_available():
     import torch.distributed
diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -1,5 +1,4 @@
 import os
-import sys
 import traceback
 from contextlib import ExitStack
 from logging import Logger
@@ -155,10 +154,6 @@ def entrypoint(worker: Callable[..., BenchmarkReport], worker_args: List[Any], l
     else:
         setup_logging(level="ERROR", to_file=log_to_file, prefix=f"RANK-PROCESS-{rank}")
 
-    if sys.platform == "win32":
-        logger.info("\t+ Disabline libuv on Windows")
-        os.environ["USE_LIBUV"] = "0"
-
     if torch.cuda.is_available():
         logger.info(f"\t+ Setting torch.distributed cuda device to {rank}")
         device = torch.device("cuda", rank)
diff --git a/tests/configs/_gguf_.yaml b/tests/configs/_gguf_.yaml
@@ -2,6 +2,6 @@ hydra:
   mode: MULTIRUN
   sweeper:
     params:
+      backend.model: ggml-org/models
       backend.task: text-generation,feature-extraction
-      backend.model: QuantFactory/gpt2-GGUF
-      backend.filename: gpt2.Q4_0.gguf
+      backend.filename: tinyllamas/stories15M-q8_0.gguf
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -53,6 +53,9 @@ def test_cli_configs(config_name):
 
 @pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"])
 def test_cli_exit_code_0(launcher):
+    if launcher == "torchrun" and sys.platform == "win32":
+        pytest.skip("torchrun is not supported on Windows")
+
     args_0 = [
         "optimum-benchmark",
         "--config-dir",
@@ -73,6 +76,9 @@ def test_cli_exit_code_0(launcher):
 
 @pytest.mark.parametrize("launcher", ["inline", "process", "torchrun"])
 def test_cli_exit_code_1(launcher):
+    if launcher == "torchrun" and sys.platform == "win32":
+        pytest.skip("torchrun is not supported on Windows")
+
     args_1 = [
         "optimum-benchmark",
         "--config-dir",