Merge pull request #520 from dice-group/cache_for_owl_reasoners

alkidbaci · web-flow · commit 91f1b34205ac · 2025-03-06T17:12:31.000+01:00
Fix for semantic cache issue
diff --git a/ontolearn/semantic_caching.py b/ontolearn/semantic_caching.py
@@ -157,26 +157,35 @@ def concept_generator(path_kg):
 
 
 
-def get_shuffled_concepts(path_kg, data_name):
-    '''Shuffle the generated concept and save it in a folder for reproducibility'''
-     # Create the directory if it does not exist
+
+def get_saved_concepts(path_kg, data_name, shuffle):
+    """Shuffle or not the generated concept and save it in a folder for reproducibility."""
+    
+    # Create the directory if it does not exist
     cache_dir = f"caching_results_{data_name}"
     os.makedirs(cache_dir, exist_ok=True)
-    save_file = os.path.join(cache_dir, "shuffled_concepts.pkl")
+
+    # Determine the filename based on shuffle flag
+    filename = "shuffled_concepts.pkl" if shuffle else "unshuffled_concepts.pkl"
+    save_file = os.path.join(cache_dir, filename)
 
     if os.path.exists(save_file):
-        # Load the saved shuffled concepts
         with open(save_file, "rb") as f:
             alc_concepts = pickle.load(f)
-        print("Loaded shuffled concepts from file.")
+        print(f"Loaded concepts from {filename}.")
     else:
-        # Generate, shuffle, and save the concepts
+        # Generate concepts and optionally shuffle
         alc_concepts = concept_generator(path_kg)
-        random.seed(0)
-        random.shuffle(alc_concepts)
+        if shuffle:
+            random.seed(0)
+            random.shuffle(alc_concepts)
+
+        # Save the concepts
         with open(save_file, "wb") as f:
             pickle.dump(alc_concepts, f)
-        print("Generated, shuffled, and saved concepts.")   
+
+        print(f"Generated and saved {'shuffled' if shuffle else 'unshuffled'} concepts.")
+    
     return alc_concepts
 
 
@@ -555,9 +564,9 @@ def run_semantic_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:
     data_name = path_kg.split("/")[-1].split("/")[-1].split(".")[0]
 
     if shuffle_concepts:
-        alc_concepts = get_shuffled_concepts(path_kg, data_name=data_name) 
+        alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=True) 
     else:
-        alc_concepts = concept_generator(path_kg)
+        alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=False) 
 
     if name_reasoner == 'EBR':
         cached_retriever = semantic_caching_size(retrieve, cache_size=cache_size, eviction_strategy=eviction, random_seed=random_seed, cache_type=cache_type, concepts=alc_concepts)
@@ -635,9 +644,9 @@ def run_non_semantic_cache(path_kg:str, path_kge:str, cache_size:int, name_reaso
     data_name = path_kg.split("/")[-1].split("/")[-1].split(".")[0]
 
     if shuffle_concepts:
-        alc_concepts = get_shuffled_concepts(path_kg, data_name=data_name) 
+        alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=True) 
     else:
-        alc_concepts = concept_generator(path_kg)
+        alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=False) 
 
     if name_reasoner == 'EBR':
         cached_retriever = non_semantic_caching_size(retrieve, cache_size=cache_size)
diff --git a/tests/test_semantic_cache.py b/tests/test_semantic_cache.py
@@ -1,54 +1,68 @@
-# from ontolearn.semantic_caching import run_semantic_cache, run_non_semantic_cache
-#
-
-# class TestSemanticCache:
-#     def setup_method(self):
-#         self.path_kg = "KGs/Family/father.owl" #path to the father datasets
-#         self.path_kge = None
-#         self.symbolic_reasoner = "HermiT"
-#         self.neural_reasoner = "EBR"
-#         self.num_concepts = 800
-#         self.cache_size = 0.8*self.num_concepts
-#         self.eviction = "LRU"
-#         self.cache_type = "cold"
-#
-#     def run_cache_tests(self, cache_semantic, cache_non_semantic):
-#         assert cache_semantic["hit_ratio"] >= cache_non_semantic["hit_ratio"], f"Expected semantic caching to have higher hit ratio, but got {cache_semantic['hit_ratio']} vs {cache_non_semantic['hit_ratio']}"
-#         assert cache_semantic["miss_ratio"] <= cache_non_semantic["miss_ratio"], f"Expected semantic caching to have lower miss ratio, but got {cache_semantic['miss_ratio']} vs {cache_non_semantic['miss_ratio']}"
-#
-#     def test_jaccard(self):
-#
-#         cache_neural,_ =  run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
-#         cache_symbolic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.symbolic_reasoner, self.eviction, 0, self.cache_type, True)
-#
-#         assert float(cache_neural["avg_jaccard"]) >= float(cache_neural["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
-#         assert float(cache_symbolic["avg_jaccard"]) >= float(cache_symbolic["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
-#
-#
-#     def test_cache_methods(self):
-#         for reasoner in [self.neural_reasoner, self.symbolic_reasoner]:
-#             cache_semantic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, self.eviction, 0, self.cache_type, True)
-#             cache_non_semantic,_ = run_non_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, True)
-#             self.run_cache_tests(cache_semantic, cache_non_semantic)
-#
-#     def test_cache_size(self):
-#         cache_large,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
-#
-#         for k in [0.1, 0.2]:
-#             cache_small,_ = run_semantic_cache(self.path_kg, self.path_kge, k * self.num_concepts, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
-#             assert cache_small["hit_ratio"] <= cache_large["hit_ratio"], f"Expected hit ratio to increase with cache size, but got {cache_small['hit_ratio']} vs {cache_large['hit_ratio']}"
-#             assert cache_small["miss_ratio"] >= cache_large["miss_ratio"], f"Expected miss ratio to decrease with cache size, but got {cache_small['miss_ratio']} vs {cache_large['miss_ratio']}"
-#
-#     def test_eviction_strategy(self):
-#         eviction_strategies = ["LRU", "FIFO", "LIFO", "MRU", "RP"]
-#         results = {strategy: float(run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, strategy, 10, self.cache_type, True)[0]["hit_ratio"]) for strategy in eviction_strategies}
-#
-#         for strategy, hit_ratio in results.items():
-#             assert isinstance(hit_ratio, float), f"Hit ratio for {strategy} should be a float, but got {type(hit_ratio)}"
-#
-#         best_strategy = max(results, key=results.get)
-#         assert best_strategy == "LRU", f"Expected LRU to be the best, but got {best_strategy}"
-#
-#         assert results, "No results were generated, possibly due to a failure in the cache evaluation process."
-#         for strategy, hit_ratio in results.items():
-#             assert 0.0 <= hit_ratio <= 1.0, f"Hit ratio for {strategy} is out of bounds: {hit_ratio}"
+import os
+
+if "CUDA_VISIBLE_DEVICES" not in os.environ:
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+import torch
+from ontolearn.semantic_caching import run_semantic_cache, run_non_semantic_cache
+
+
+def check_cuda():
+    if torch.cuda.is_available():
+        print("GPU detected. Setting CUDA_VISIBLE_DEVICES=0")
+    else:
+         print("No GPU detected. Running on CPU.")
+            
+check_cuda()
+
+class TestSemanticCache:
+    def setup_method(self):
+        self.path_kg = "KGs/Family/father.owl" #path to the father datasets
+        self.path_kge = None
+        self.symbolic_reasoner = "HermiT"
+        self.neural_reasoner = "EBR"
+        self.num_concepts = 800
+        self.cache_size = 0.8*self.num_concepts
+        self.eviction = "LRU"
+        self.cache_type = "cold"
+
+    def run_cache_tests(self, cache_semantic, cache_non_semantic):
+        assert cache_semantic["hit_ratio"] >= cache_non_semantic["hit_ratio"], f"Expected semantic caching to have higher hit ratio, but got {cache_semantic['hit_ratio']} vs {cache_non_semantic['hit_ratio']}"
+        assert cache_semantic["miss_ratio"] <= cache_non_semantic["miss_ratio"], f"Expected semantic caching to have lower miss ratio, but got {cache_semantic['miss_ratio']} vs {cache_non_semantic['miss_ratio']}"
+
+    def test_jaccard(self):
+
+        cache_neural,_ =  run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
+        cache_symbolic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.symbolic_reasoner, self.eviction, 0, self.cache_type, True)
+       
+        assert float(cache_neural["avg_jaccard"]) >= float(cache_neural["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
+        assert float(cache_symbolic["avg_jaccard"]) >= float(cache_symbolic["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
+       
+
+    def test_cache_methods(self):
+        for reasoner in [self.neural_reasoner, self.symbolic_reasoner]:
+            cache_semantic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, self.eviction, 0, self.cache_type, True)
+            cache_non_semantic,_ = run_non_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, True)
+            self.run_cache_tests(cache_semantic, cache_non_semantic)
+
+    def test_cache_size(self):
+        cache_large,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
+
+        for k in [0.1, 0.2]:
+            cache_small,_ = run_semantic_cache(self.path_kg, self.path_kge, k * self.num_concepts, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
+            assert cache_small["hit_ratio"] <= cache_large["hit_ratio"], f"Expected hit ratio to increase with cache size, but got {cache_small['hit_ratio']} vs {cache_large['hit_ratio']}"
+            assert cache_small["miss_ratio"] >= cache_large["miss_ratio"], f"Expected miss ratio to decrease with cache size, but got {cache_small['miss_ratio']} vs {cache_large['miss_ratio']}"
+
+    def test_eviction_strategy(self):
+        eviction_strategies = ["LRU", "FIFO", "LIFO", "MRU", "RP"]
+        results = {strategy: float(run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, strategy, 10, self.cache_type, True)[0]["hit_ratio"]) for strategy in eviction_strategies}
+       
+        for strategy, hit_ratio in results.items():
+            assert isinstance(hit_ratio, float), f"Hit ratio for {strategy} should be a float, but got {type(hit_ratio)}"
+    
+        best_strategy = max(results, key=results.get)
+        assert best_strategy == "LRU", f"Expected LRU to be the best, but got {best_strategy}"
+
+        assert results, "No results were generated, possibly due to a failure in the cache evaluation process."
+        for strategy, hit_ratio in results.items():
+            assert 0.0 <= hit_ratio <= 1.0, f"Hit ratio for {strategy} is out of bounds: {hit_ratio}"