Skip to content

Commit 91f1b34

Browse files
authored
Merge pull request #520 from dice-group/cache_for_owl_reasoners
Fix for semantic cache issue
2 parents c8f6454 + cc96ba3 commit 91f1b34

File tree

2 files changed

+91
-68
lines changed

2 files changed

+91
-68
lines changed

ontolearn/semantic_caching.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -157,26 +157,35 @@ def concept_generator(path_kg):
157157

158158

159159

160-
def get_shuffled_concepts(path_kg, data_name):
161-
'''Shuffle the generated concept and save it in a folder for reproducibility'''
162-
# Create the directory if it does not exist
160+
161+
def get_saved_concepts(path_kg, data_name, shuffle):
162+
"""Shuffle or not the generated concept and save it in a folder for reproducibility."""
163+
164+
# Create the directory if it does not exist
163165
cache_dir = f"caching_results_{data_name}"
164166
os.makedirs(cache_dir, exist_ok=True)
165-
save_file = os.path.join(cache_dir, "shuffled_concepts.pkl")
167+
168+
# Determine the filename based on shuffle flag
169+
filename = "shuffled_concepts.pkl" if shuffle else "unshuffled_concepts.pkl"
170+
save_file = os.path.join(cache_dir, filename)
166171

167172
if os.path.exists(save_file):
168-
# Load the saved shuffled concepts
169173
with open(save_file, "rb") as f:
170174
alc_concepts = pickle.load(f)
171-
print("Loaded shuffled concepts from file.")
175+
print(f"Loaded concepts from {filename}.")
172176
else:
173-
# Generate, shuffle, and save the concepts
177+
# Generate concepts and optionally shuffle
174178
alc_concepts = concept_generator(path_kg)
175-
random.seed(0)
176-
random.shuffle(alc_concepts)
179+
if shuffle:
180+
random.seed(0)
181+
random.shuffle(alc_concepts)
182+
183+
# Save the concepts
177184
with open(save_file, "wb") as f:
178185
pickle.dump(alc_concepts, f)
179-
print("Generated, shuffled, and saved concepts.")
186+
187+
print(f"Generated and saved {'shuffled' if shuffle else 'unshuffled'} concepts.")
188+
180189
return alc_concepts
181190

182191

@@ -555,9 +564,9 @@ def run_semantic_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:
555564
data_name = path_kg.split("/")[-1].split("/")[-1].split(".")[0]
556565

557566
if shuffle_concepts:
558-
alc_concepts = get_shuffled_concepts(path_kg, data_name=data_name)
567+
alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=True)
559568
else:
560-
alc_concepts = concept_generator(path_kg)
569+
alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=False)
561570

562571
if name_reasoner == 'EBR':
563572
cached_retriever = semantic_caching_size(retrieve, cache_size=cache_size, eviction_strategy=eviction, random_seed=random_seed, cache_type=cache_type, concepts=alc_concepts)
@@ -635,9 +644,9 @@ def run_non_semantic_cache(path_kg:str, path_kge:str, cache_size:int, name_reaso
635644
data_name = path_kg.split("/")[-1].split("/")[-1].split(".")[0]
636645

637646
if shuffle_concepts:
638-
alc_concepts = get_shuffled_concepts(path_kg, data_name=data_name)
647+
alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=True)
639648
else:
640-
alc_concepts = concept_generator(path_kg)
649+
alc_concepts = get_saved_concepts(path_kg, data_name=data_name, shuffle=False)
641650

642651
if name_reasoner == 'EBR':
643652
cached_retriever = non_semantic_caching_size(retrieve, cache_size=cache_size)

tests/test_semantic_cache.py

Lines changed: 68 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,68 @@
1-
# from ontolearn.semantic_caching import run_semantic_cache, run_non_semantic_cache
2-
#
3-
4-
# class TestSemanticCache:
5-
# def setup_method(self):
6-
# self.path_kg = "KGs/Family/father.owl" #path to the father datasets
7-
# self.path_kge = None
8-
# self.symbolic_reasoner = "HermiT"
9-
# self.neural_reasoner = "EBR"
10-
# self.num_concepts = 800
11-
# self.cache_size = 0.8*self.num_concepts
12-
# self.eviction = "LRU"
13-
# self.cache_type = "cold"
14-
#
15-
# def run_cache_tests(self, cache_semantic, cache_non_semantic):
16-
# assert cache_semantic["hit_ratio"] >= cache_non_semantic["hit_ratio"], f"Expected semantic caching to have higher hit ratio, but got {cache_semantic['hit_ratio']} vs {cache_non_semantic['hit_ratio']}"
17-
# assert cache_semantic["miss_ratio"] <= cache_non_semantic["miss_ratio"], f"Expected semantic caching to have lower miss ratio, but got {cache_semantic['miss_ratio']} vs {cache_non_semantic['miss_ratio']}"
18-
#
19-
# def test_jaccard(self):
20-
#
21-
# cache_neural,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
22-
# cache_symbolic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.symbolic_reasoner, self.eviction, 0, self.cache_type, True)
23-
#
24-
# assert float(cache_neural["avg_jaccard"]) >= float(cache_neural["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
25-
# assert float(cache_symbolic["avg_jaccard"]) >= float(cache_symbolic["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
26-
#
27-
#
28-
# def test_cache_methods(self):
29-
# for reasoner in [self.neural_reasoner, self.symbolic_reasoner]:
30-
# cache_semantic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, self.eviction, 0, self.cache_type, True)
31-
# cache_non_semantic,_ = run_non_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, True)
32-
# self.run_cache_tests(cache_semantic, cache_non_semantic)
33-
#
34-
# def test_cache_size(self):
35-
# cache_large,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
36-
#
37-
# for k in [0.1, 0.2]:
38-
# cache_small,_ = run_semantic_cache(self.path_kg, self.path_kge, k * self.num_concepts, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
39-
# assert cache_small["hit_ratio"] <= cache_large["hit_ratio"], f"Expected hit ratio to increase with cache size, but got {cache_small['hit_ratio']} vs {cache_large['hit_ratio']}"
40-
# assert cache_small["miss_ratio"] >= cache_large["miss_ratio"], f"Expected miss ratio to decrease with cache size, but got {cache_small['miss_ratio']} vs {cache_large['miss_ratio']}"
41-
#
42-
# def test_eviction_strategy(self):
43-
# eviction_strategies = ["LRU", "FIFO", "LIFO", "MRU", "RP"]
44-
# results = {strategy: float(run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, strategy, 10, self.cache_type, True)[0]["hit_ratio"]) for strategy in eviction_strategies}
45-
#
46-
# for strategy, hit_ratio in results.items():
47-
# assert isinstance(hit_ratio, float), f"Hit ratio for {strategy} should be a float, but got {type(hit_ratio)}"
48-
#
49-
# best_strategy = max(results, key=results.get)
50-
# assert best_strategy == "LRU", f"Expected LRU to be the best, but got {best_strategy}"
51-
#
52-
# assert results, "No results were generated, possibly due to a failure in the cache evaluation process."
53-
# for strategy, hit_ratio in results.items():
54-
# assert 0.0 <= hit_ratio <= 1.0, f"Hit ratio for {strategy} is out of bounds: {hit_ratio}"
1+
import os
2+
3+
if "CUDA_VISIBLE_DEVICES" not in os.environ:
4+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
5+
6+
import torch
7+
from ontolearn.semantic_caching import run_semantic_cache, run_non_semantic_cache
8+
9+
10+
def check_cuda():
11+
if torch.cuda.is_available():
12+
print("GPU detected. Setting CUDA_VISIBLE_DEVICES=0")
13+
else:
14+
print("No GPU detected. Running on CPU.")
15+
16+
check_cuda()
17+
18+
class TestSemanticCache:
19+
def setup_method(self):
20+
self.path_kg = "KGs/Family/father.owl" #path to the father datasets
21+
self.path_kge = None
22+
self.symbolic_reasoner = "HermiT"
23+
self.neural_reasoner = "EBR"
24+
self.num_concepts = 800
25+
self.cache_size = 0.8*self.num_concepts
26+
self.eviction = "LRU"
27+
self.cache_type = "cold"
28+
29+
def run_cache_tests(self, cache_semantic, cache_non_semantic):
30+
assert cache_semantic["hit_ratio"] >= cache_non_semantic["hit_ratio"], f"Expected semantic caching to have higher hit ratio, but got {cache_semantic['hit_ratio']} vs {cache_non_semantic['hit_ratio']}"
31+
assert cache_semantic["miss_ratio"] <= cache_non_semantic["miss_ratio"], f"Expected semantic caching to have lower miss ratio, but got {cache_semantic['miss_ratio']} vs {cache_non_semantic['miss_ratio']}"
32+
33+
def test_jaccard(self):
34+
35+
cache_neural,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
36+
cache_symbolic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.symbolic_reasoner, self.eviction, 0, self.cache_type, True)
37+
38+
assert float(cache_neural["avg_jaccard"]) >= float(cache_neural["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
39+
assert float(cache_symbolic["avg_jaccard"]) >= float(cache_symbolic["avg_jaccard_reas"]), "Expected average Jaccard similarity to be at least as good as reasoner-based retrieval."
40+
41+
42+
def test_cache_methods(self):
43+
for reasoner in [self.neural_reasoner, self.symbolic_reasoner]:
44+
cache_semantic,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, self.eviction, 0, self.cache_type, True)
45+
cache_non_semantic,_ = run_non_semantic_cache(self.path_kg, self.path_kge, self.cache_size, reasoner, True)
46+
self.run_cache_tests(cache_semantic, cache_non_semantic)
47+
48+
def test_cache_size(self):
49+
cache_large,_ = run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
50+
51+
for k in [0.1, 0.2]:
52+
cache_small,_ = run_semantic_cache(self.path_kg, self.path_kge, k * self.num_concepts, self.neural_reasoner, self.eviction, 0, self.cache_type, True)
53+
assert cache_small["hit_ratio"] <= cache_large["hit_ratio"], f"Expected hit ratio to increase with cache size, but got {cache_small['hit_ratio']} vs {cache_large['hit_ratio']}"
54+
assert cache_small["miss_ratio"] >= cache_large["miss_ratio"], f"Expected miss ratio to decrease with cache size, but got {cache_small['miss_ratio']} vs {cache_large['miss_ratio']}"
55+
56+
def test_eviction_strategy(self):
57+
eviction_strategies = ["LRU", "FIFO", "LIFO", "MRU", "RP"]
58+
results = {strategy: float(run_semantic_cache(self.path_kg, self.path_kge, self.cache_size, self.neural_reasoner, strategy, 10, self.cache_type, True)[0]["hit_ratio"]) for strategy in eviction_strategies}
59+
60+
for strategy, hit_ratio in results.items():
61+
assert isinstance(hit_ratio, float), f"Hit ratio for {strategy} should be a float, but got {type(hit_ratio)}"
62+
63+
best_strategy = max(results, key=results.get)
64+
assert best_strategy == "LRU", f"Expected LRU to be the best, but got {best_strategy}"
65+
66+
assert results, "No results were generated, possibly due to a failure in the cache evaluation process."
67+
for strategy, hit_ratio in results.items():
68+
assert 0.0 <= hit_ratio <= 1.0, f"Hit ratio for {strategy} is out of bounds: {hit_ratio}"

0 commit comments

Comments
 (0)