Refactoring

dice-group · Dec 11, 2024 · e4f6aa6 · e4f6aa6
1 parent 01c9d81
commit e4f6aa6
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 113 deletions.
diff --git a/examples/retrieval_with_cache.py b/examples/retrieval_with_cache.py
@@ -23,59 +23,59 @@ def get_cache_size(list_k, path_kg):
     return [max(1, int(k * data_size)) for k in list_k]
 
 
-# results = []
-# for path_kg in args.path_kg:
-#     for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
-#         for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
-#             result, detailed = run_cache(
-#                 path_kg=path_kg,
-#                 path_kge=args.path_kge,
-#                 cache_size=cache_size,
-#                 name_reasoner=args.name_reasoner,
-#                 eviction=strategy,
-#                 random_seed=args.random_seed_for_RP,
-#                 cache_type=args.cache_type,
-#                 shuffle_concepts=args.shuffle_concepts
-#             )
-#             results.append(result)
-
-#     data_kg = result['dataset']
-#     df = pd.DataFrame(results)
-#     print(df)
-
-#     # Save to CSV
-#     df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)
-
-
 results = []
-detailed_results = []
 for path_kg in args.path_kg:
     for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
-        result, D = run_cache(
+        for strategy in ['LIFO', 'FIFO', 'LRU', 'MRU', 'RP']:
+            result, detailed = run_cache(
                 path_kg=path_kg,
                 path_kge=args.path_kge,
                 cache_size=cache_size,
                 name_reasoner=args.name_reasoner,
-                eviction=args.eviction_strategy,
+                eviction=strategy,
                 random_seed=args.random_seed_for_RP,
                 cache_type=args.cache_type,
                 shuffle_concepts=args.shuffle_concepts
             )
-        results.append(result)
-        detailed_results.append(D)
+            results.append(result)
+
+    data_kg = result['dataset']
+    df = pd.DataFrame(results)
+    print(df)
+
+    # Save to CSV
+    df.to_csv(f'caching_results_{data_kg}/cache_experiments_{args.name_reasoner}_{data_kg}.csv', index=False)
+
+
+# results = []
+# detailed_results = []
+# for path_kg in args.path_kg:
+#     for cache_size in get_cache_size(args.cache_size_ratios, path_kg):
+#         result, D = run_cache(
+#                 path_kg=path_kg,
+#                 path_kge=args.path_kge,
+#                 cache_size=cache_size,
+#                 name_reasoner=args.name_reasoner,
+#                 eviction=args.eviction_strategy,
+#                 random_seed=args.random_seed_for_RP,
+#                 cache_type=args.cache_type,
+#                 shuffle_concepts=args.shuffle_concepts
+#             )
+#         results.append(result)
+#         detailed_results.append(D)
 
-all_detailed_results = [item for sublist in detailed_results for item in sublist]
+# all_detailed_results = [item for sublist in detailed_results for item in sublist]
 
-results = pd.DataFrame(results)
-# results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')  
+# results = pd.DataFrame(results)
+# # results.to_csv(f'caching_results/cache_experiments_{args.name_reasoner}.csv')  
 
-plot_scale_factor(results, args.name_reasoner)    
-plot_jaccard_vs_cache_size(results, args.name_reasoner) 
+# plot_scale_factor(results, args.name_reasoner)    
+# plot_jaccard_vs_cache_size(results, args.name_reasoner) 
 
-# # print(results.to_latex(index=False))
+# # # print(results.to_latex(index=False))
 
-all_detailed_results = pd.DataFrame(all_detailed_results)
-bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
+# all_detailed_results = pd.DataFrame(all_detailed_results)
+# bar_plot_separate_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
 # bar_plot_all_data(all_detailed_results, cache_size=90, name_reasoner=args.name_reasoner)
 # all_detailed_results.to_csv(f'caching_results/detailed_cache_experiments_{args.name_reasoner}.csv')
 
diff --git a/ontolearn/semantic_caching.py b/ontolearn/semantic_caching.py
@@ -184,7 +184,7 @@ def __init__(self, cache_size, strategy='LIFO', random_seed=10):
 
     def _evict(self):
         '''empty the cache when it is full using different strategy'''
-        if len(self.cache) >= self.cache_size:
+        if len(self.cache) > self.cache_size:
             if self.strategy == 'FIFO':
                 self.cache.popitem(last=False)  # Evict the oldest item (first in)
             elif self.strategy == 'LIFO':
@@ -412,15 +412,14 @@ def handle_owl_some_values_from(owl_expression):
             some_values_expr = transform_forall_to_exists(all_values_expr)
             cached_result = retrieve_from_cache(some_values_expr)
             result = (All_individuals - cached_result) if cached_result is not None else func(*args)
+
         else:
             result = func(*args)
 
         stats['time'] += (time.time() - start_time)
         cache.put(str_expression, result)
         return result
 
-
-
     def transform_forall_to_exists(expression):
         pattern_negated = r'∀ (\w+)\.\(¬(\w+)\)'
         replacement_negated = r'∃ \1.\2'
@@ -563,76 +562,3 @@ def run_cache(path_kg:str, path_kge:str, cache_size:int, name_reasoner:str, evic
 
 
 
-# def subsumption_based_caching(func, cache_size):
-#     cache = {}  # Dictionary to store cached results
-
-#     def store(concept, instances):
-#         # Check if cache limit will be exceeded
-#         if len(instances) + len(cache) > cache_size:
-#             purge(len(instances))  # Adjusted to ensure cache size limit
-#         # Add concept and instances to cache
-#         cache[concept] = instances
-
-#     def purge(needed_space):
-#         # Remove oldest items until there's enough space
-#         while len(cache) > needed_space:
-#             cache.pop(next(iter(cache)))
-
-#     def wrapper(*args):
-#         path_onto = args[1]
-#         onto = get_ontology(path_onto).load()
-
-#         # Synchronize the reasoner (e.g., using Pellet)
-#         # with onto:
-#         #     sync_reasoner(infer_property_values=True)
-
-#         all_individuals = {a for a in onto.individuals()}       
-#         str_expression = owl_expression_to_dl(args[0])
-#         owl_expression = args[0]
-
-#         # Check cache for existing results
-#         if str_expression in cache:
-#             return cache[str_expression]
-
-#         super_concepts = set()
-#         namespace, class_name = owl_expression.str.split('#') 
-#         class_expression = f"{namespace.split('/')[-1]}.{class_name}"
-
-#         all_classes = [i for i in list(onto.classes())]
-
-#         for j in all_classes:
-#             if str(j) == class_expression:
-#                 class_expression = j
-
-#         for D in list(cache.keys()):
-#             # print(owl_expression)
-#             # exit(0)
-#             if D in class_expression.ancestors():  # Check if C ⊑ D
-#                 super_concepts.add(D)
-
-#         print(super_concepts)
-#         exit(0)
-#         # Compute instances based on subsumption
-#         if len(super_concepts) == 0:
-#             instances = all_individuals
-#         else:
-#             instances = set.intersection(
-#                 *[wrapper(D, path_onto) for D in super_concepts]
-#             )
-
-#         # Filter instances by checking if each is an instance of the concept
-#         instance_set = set()
-
-#         for individual in instances:
-#              for type_entry in individual.is_a:
-#                 type_iri = str(type_entry.iri)
-#                 if owl_expression.str == type_iri:
-#                     instance_set.add(individual)
-#                     break
-
-#         # Store in cache
-#         store(str_expression, instance_set)
-#         return instance_set
-
-#     return wrapper
-