diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index bc97a64..af120ff 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -43,13 +43,13 @@ jobs:
- name: Build and test with uv
run: |
uv venv --python ${{ matrix.python-version }}
- CMAKE_ARGS="-DENABLE_GLCACHE=OFF -DENABLE_LRB=OFF -DENABLE_3L_CACHE=OFF" uv pip install -e .[dev] -vvv
+ uv pip install -e .[dev] -vvv
uv run python -c "import libcachesim; print('✓ Import successful for Python ${{ matrix.python-version }} on ${{ matrix.os }}')"
- name: Run tests
run: |
if [ -d "tests" ]; then
- uv run python -m pytest tests/ -v
+ uv run python -m pytest tests/ -v -m "not optional"
else
echo "No tests directory found, skipping tests"
fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 05bdbc2..3e63c5c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,25 +12,15 @@ endif()
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
# Options
-option(ENABLE_GLCACHE "Enable group-learned cache" ON)
-option(ENABLE_LRB "Enable LRB" ON)
-option(ENABLE_3L_CACHE "Enable 3LCache" ON)
+option(ENABLE_GLCACHE "Enable group-learned cache" OFF)
+option(ENABLE_LRB "Enable LRB" OFF)
+option(ENABLE_3L_CACHE "Enable 3LCache" OFF)
# C++ standard
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-# ============================================================================
-set(USER_PREFIX "$ENV{HOME}/local")
-
-list(APPEND CMAKE_PREFIX_PATH "${USER_PREFIX}")
-list(APPEND CMAKE_LIBRARY_PATH "${USER_PREFIX}/lib")
-list(APPEND CMAKE_INCLUDE_PATH "${USER_PREFIX}/include")
-
-include_directories("${USER_PREFIX}/include")
-link_directories("${USER_PREFIX}/lib")
-
# =============================================================================
# Compiler Flags Configuration
# =============================================================================
@@ -165,18 +155,6 @@ configure_logging()
# Dependency Management
# =============================================================================
-# Add user-installed dependencies to search paths
-if(DEFINED ENV{CMAKE_PREFIX_PATH})
- list(PREPEND CMAKE_PREFIX_PATH $ENV{CMAKE_PREFIX_PATH})
-endif()
-
-# Add common user installation paths
-set(USER_PREFIX_PATHS
- "$ENV{HOME}/local"
- "$ENV{HOME}/.local"
- "/usr/local"
-)
-
# Find required packages
find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module)
find_package(pybind11 CONFIG REQUIRED)
@@ -192,85 +170,23 @@ include_directories(${GLib_INCLUDE_DIRS})
link_directories(${GLib_LIBRARY_DIRS})
list(APPEND required_libs ${GLib_LIBRARIES})
-# ZSTD dependency - try multiple find methods
-find_package(ZSTD QUIET)
-if(NOT ZSTD_FOUND)
- # Try pkg-config
- pkg_check_modules(ZSTD_PC QUIET libzstd)
- if(ZSTD_PC_FOUND)
- set(ZSTD_FOUND TRUE)
- set(ZSTD_INCLUDE_DIR ${ZSTD_PC_INCLUDE_DIRS})
- set(ZSTD_LIBRARIES ${ZSTD_PC_LIBRARIES})
- set(ZSTD_LIBRARY_DIRS ${ZSTD_PC_LIBRARY_DIRS})
- else()
- # Try manual find
- find_path(ZSTD_INCLUDE_DIR zstd.h
- PATHS ${CMAKE_INCLUDE_PATH}
- PATH_SUFFIXES zstd
- )
- find_library(ZSTD_LIBRARIES zstd
- PATHS ${CMAKE_LIBRARY_PATH}
- )
- if(ZSTD_INCLUDE_DIR AND ZSTD_LIBRARIES)
- set(ZSTD_FOUND TRUE)
- endif()
- endif()
-endif()
-
-if(NOT ZSTD_FOUND)
- message(FATAL_ERROR "ZSTD not found. Please install zstd or set CMAKE_PREFIX_PATH to point to user installation.")
-endif()
-
+# ZSTD dependency
+find_package(ZSTD REQUIRED)
message(STATUS "ZSTD_INCLUDE_DIR: ${ZSTD_INCLUDE_DIR}, ZSTD_LIBRARIES: ${ZSTD_LIBRARIES}")
-include_directories(${ZSTD_INCLUDE_DIR})
-if(ZSTD_LIBRARY_DIRS)
- link_directories(${ZSTD_LIBRARY_DIRS})
+if("${ZSTD_LIBRARIES}" STREQUAL "")
+ message(FATAL_ERROR "zstd not found")
endif()
+include_directories(${ZSTD_INCLUDE_DIR})
+link_directories(${ZSTD_LIBRARY_DIRS})
list(APPEND required_libs ${ZSTD_LIBRARIES})
-# TCMalloc dependency (optional)
-find_library(TCMALLOC_LIBRARY tcmalloc
- PATHS ${CMAKE_LIBRARY_PATH}
-)
-if(TCMALLOC_LIBRARY)
- list(APPEND optional_libs ${TCMALLOC_LIBRARY})
- message(STATUS "TCMalloc found: ${TCMALLOC_LIBRARY}")
- add_compile_definitions(USE_TCMALLOC=1)
-else()
- message(STATUS "TCMalloc not found, using system malloc")
-endif()
-
# Optional dependencies based on features
if(ENABLE_GLCACHE)
- # Try to find XGBoost
- find_package(xgboost QUIET)
- if(NOT xgboost_FOUND)
- # Try manual find for user installation
- find_path(XGBOOST_INCLUDE_DIR xgboost
- PATHS ${CMAKE_INCLUDE_PATH}
- )
- find_library(XGBOOST_LIBRARIES xgboost
- PATHS ${CMAKE_LIBRARY_PATH}
- )
- if(XGBOOST_INCLUDE_DIR AND XGBOOST_LIBRARIES)
- set(xgboost_FOUND TRUE)
- add_library(xgboost::xgboost UNKNOWN IMPORTED)
- set_target_properties(xgboost::xgboost PROPERTIES
- IMPORTED_LOCATION ${XGBOOST_LIBRARIES}
- INTERFACE_INCLUDE_DIRECTORIES ${XGBOOST_INCLUDE_DIR}
- )
- endif()
- endif()
-
- if(xgboost_FOUND)
- include_directories(${XGBOOST_INCLUDE_DIR})
- list(APPEND optional_libs xgboost::xgboost)
- add_compile_definitions(ENABLE_GLCACHE=1)
- message(STATUS "XGBOOST_INCLUDE_DIR: ${XGBOOST_INCLUDE_DIR}")
- else()
- message(WARNING "XGBoost not found, disabling GLCACHE feature")
- set(ENABLE_GLCACHE OFF)
- endif()
+ find_package(xgboost REQUIRED)
+ include_directories(${XGBOOST_INCLUDE_DIR})
+ list(APPEND optional_libs xgboost::xgboost)
+ add_compile_definitions(ENABLE_GLCACHE=1)
+ message(STATUS "XGBOOST_INCLUDE_DIR: ${XGBOOST_INCLUDE_DIR}")
endif()
# LightGBM for LRB and 3L_CACHE
@@ -285,30 +201,22 @@ foreach(FEATURE ${LIGHTGBM_FEATURES})
endforeach()
if(LIGHTGBM_NEEDED)
- # Try to find LightGBM
if(NOT DEFINED LIGHTGBM_PATH)
- find_path(LIGHTGBM_PATH LightGBM
- PATHS ${CMAKE_INCLUDE_PATH}
- )
- endif()
-
- if(NOT DEFINED LIGHTGBM_LIB)
- find_library(LIGHTGBM_LIB _lightgbm
- PATHS ${CMAKE_LIBRARY_PATH}
- )
+ find_path(LIGHTGBM_PATH LightGBM)
endif()
-
if(NOT LIGHTGBM_PATH)
- message(FATAL_ERROR "LIGHTGBM_PATH not found. Please install LightGBM or set CMAKE_PREFIX_PATH.")
+ message(FATAL_ERROR "LIGHTGBM_PATH not found")
endif()
+ if(NOT DEFINED LIGHTGBM_LIB)
+ find_library(LIGHTGBM_LIB _lightgbm)
+ endif()
if(NOT LIGHTGBM_LIB)
- message(FATAL_ERROR "LIGHTGBM_LIB not found. Please install LightGBM or set CMAKE_PREFIX_PATH.")
+ message(FATAL_ERROR "LIGHTGBM_LIB not found")
endif()
include_directories(${LIGHTGBM_PATH})
list(APPEND optional_libs ${LIGHTGBM_LIB})
- message(STATUS "LightGBM found: ${LIGHTGBM_PATH}, ${LIGHTGBM_LIB}")
endif()
# =============================================================================
@@ -411,4 +319,4 @@ configure_platform_specific_linking(libcachesim_python)
# Installation
# =============================================================================
-install(TARGETS libcachesim_python LIBRARY DESTINATION libcachesim)
+install(TARGETS libcachesim_python LIBRARY DESTINATION libcachesim)
\ No newline at end of file
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index f2686f3..481de58 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -9,17 +9,24 @@ docs_dir: src
nav:
- Home:
- libCacheSim Python: index.md
- - Getting Started:
- - Quick Start: quickstart.md
+ - Getting Started:
+ - getting_started/quickstart.md
+ - getting_started/installation.md
+ - Examples:
+ - examples/simulation.md
+ - examples/analysis.md
+ - examples/plugins.md
+ - User Guide:
+ - FAQ: faq.md
+ - Developer Guide:
+ - General: developer.md
- API Reference:
- API Documentation: api.md
- - Examples:
- - Usage Examples: examples.md
theme:
name: material
- logo: assets/logos/logo-only-light.ico
- favicon: assets/logos/logo-only-light.ico
+ # logo: assets/logos/logo-only-light.ico
+ # favicon: assets/logos/logo-only-light.ico
language: en
palette:
# Palette toggle for automatic mode
@@ -77,7 +84,9 @@ plugins:
build: true
nav_translations:
Home: 首页
- Quick Start: 快速开始
+ Getting Started: 快速开始
+ User Guide: 用户指南
+ Developer Guide: 开发者指南
API Reference: API参考
Examples: 使用示例
diff --git a/docs/src/assets/logos/logo.jpg b/docs/src/assets/logos/logo.jpg
new file mode 100644
index 0000000..779fe2a
Binary files /dev/null and b/docs/src/assets/logos/logo.jpg differ
diff --git a/docs/src/en/api.md b/docs/src/en/api.md
index b3c4a68..8c3fc1b 100644
--- a/docs/src/en/api.md
+++ b/docs/src/en/api.md
@@ -1,395 +1,3 @@
# API Reference
-This page provides detailed API documentation for the libCacheSim Python bindings.
-
-## Core Classes
-
-### Cache Classes
-
-All cache classes inherit from the base cache interface and provide the following methods:
-
-```python
-class Cache:
- """Base cache interface."""
-
- def get(self, obj_id: int, obj_size: int = 1) -> bool:
- """Request an object from the cache.
-
- Args:
- obj_id: Object identifier
- obj_size: Object size in bytes
-
- Returns:
- True if cache hit, False if cache miss
- """
-
- def get_hit_ratio(self) -> float:
- """Get the current cache hit ratio."""
-
- def get_miss_ratio(self) -> float:
- """Get the current cache miss ratio."""
-
- def get_num_hits(self) -> int:
- """Get the total number of cache hits."""
-
- def get_num_misses(self) -> int:
- """Get the total number of cache misses."""
-```
-
-### Available Cache Algorithms
-
-```python
-# Basic algorithms
-def LRU(cache_size: int) -> Cache: ...
-def LFU(cache_size: int) -> Cache: ...
-def FIFO(cache_size: int) -> Cache: ...
-def Clock(cache_size: int) -> Cache: ...
-def Random(cache_size: int) -> Cache: ...
-
-# Advanced algorithms
-def ARC(cache_size: int) -> Cache: ...
-def S3FIFO(cache_size: int) -> Cache: ...
-def Sieve(cache_size: int) -> Cache: ...
-def TinyLFU(cache_size: int) -> Cache: ...
-def TwoQ(cache_size: int) -> Cache: ...
-```ence
-
-This page provides detailed API documentation for libCacheSim Python bindings.
-
-## Core Classes
-
-### Cache Classes
-
-All cache classes inherit from the base cache interface and provide the following methods:
-
-::: libcachesim.cache
-
-### TraceReader
-
-```python
-class TraceReader:
- """Read trace files in various formats."""
-
- def __init__(self, trace_path: str, trace_type: TraceType,
- reader_params: ReaderInitParam = None):
- """Initialize trace reader.
-
- Args:
- trace_path: Path to trace file
- trace_type: Type of trace format
- reader_params: Optional reader configuration
- """
-
- def __iter__(self):
- """Iterate over requests in the trace."""
-
- def reset(self):
- """Reset reader to beginning of trace."""
-
- def skip(self, n: int):
- """Skip n requests."""
-
- def clone(self):
- """Create a copy of the reader."""
-```
-
-### SyntheticReader
-
-```python
-class SyntheticReader:
- """Generate synthetic workloads."""
-
- def __init__(self, num_objects: int, num_requests: int,
- distribution: str = "zipf", alpha: float = 1.0,
- obj_size: int = 1, seed: int = None):
- """Initialize synthetic reader.
-
- Args:
- num_objects: Number of unique objects
- num_requests: Total requests to generate
- distribution: Distribution type ("zipf", "uniform")
- alpha: Zipf skewness parameter
- obj_size: Object size in bytes
- seed: Random seed for reproducibility
- """
-```
-
-### TraceAnalyzer
-
-```python
-class TraceAnalyzer:
- """Analyze trace characteristics."""
-
- def __init__(self, trace_path: str, trace_type: TraceType,
- reader_params: ReaderInitParam = None):
- """Initialize trace analyzer."""
-
- def get_num_requests(self) -> int:
- """Get total number of requests."""
-
- def get_num_objects(self) -> int:
- """Get number of unique objects."""
-
- def get_working_set_size(self) -> int:
- """Get working set size."""
-```
-
-## Enumerations and Constants
-
-### TraceType
-
-```python
-class TraceType:
- """Supported trace file formats."""
- CSV_TRACE = "csv"
- BINARY_TRACE = "binary"
- ORACLE_GENERAL_TRACE = "oracle"
- PLAIN_TXT_TRACE = "txt"
-```
-
-### SamplerType
-
-```python
-class SamplerType:
- """Sampling strategies."""
- SPATIAL_SAMPLER = "spatial"
- TEMPORAL_SAMPLER = "temporal"
-```
-
-### ReqOp
-
-```python
-class ReqOp:
- """Request operation types."""
- READ = "read"
- WRITE = "write"
- DELETE = "delete"
-```
-
-## Data Structures
-
-### Request
-
-```python
-class Request:
- """Represents a cache request."""
-
- def __init__(self):
- self.obj_id: int = 0
- self.obj_size: int = 1
- self.timestamp: int = 0
- self.op: str = "read"
-```
-
-### ReaderInitParam
-
-```python
-class ReaderInitParam:
- """Configuration parameters for trace readers."""
-
- def __init__(self):
- self.has_header: bool = False
- self.delimiter: str = ","
- self.obj_id_is_num: bool = True
- self.ignore_obj_size: bool = False
- self.ignore_size_zero_req: bool = True
- self.cap_at_n_req: int = -1
- self.block_size: int = 4096
- self.trace_start_offset: int = 0
-
- # Field mappings (1-indexed)
- self.time_field: int = 1
- self.obj_id_field: int = 2
- self.obj_size_field: int = 3
- self.op_field: int = 4
-
- self.sampler: Sampler = None
-```
-
-### Sampler
-
-```python
-class Sampler:
- """Configuration for request sampling."""
-
- def __init__(self, sample_ratio: float = 1.0,
- type: str = "spatial"):
- """Initialize sampler.
-
- Args:
- sample_ratio: Fraction of requests to sample (0.0-1.0)
- type: Sampling type ("spatial" or "temporal")
- """
- self.sample_ratio = sample_ratio
- self.type = type
-```
-
-## Utility Functions
-
-### Synthetic Trace Generation
-
-```python
-def create_zipf_requests(num_objects, num_requests, alpha, obj_size, seed=None):
- """
- Create Zipf-distributed synthetic requests.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Total number of requests to generate
- alpha (float): Zipf skewness parameter (higher = more skewed)
- obj_size (int): Size of each object in bytes
- seed (int, optional): Random seed for reproducibility
-
- Returns:
- List[Request]: List of generated requests
- """
-
-def create_uniform_requests(num_objects, num_requests, obj_size, seed=None):
- """
- Create uniformly-distributed synthetic requests.
-
- Args:
- num_objects (int): Number of unique objects
- num_requests (int): Total number of requests to generate
- obj_size (int): Size of each object in bytes
- seed (int, optional): Random seed for reproducibility
-
- Returns:
- List[Request]: List of generated requests
- """
-```
-
-### Cache Algorithms
-
-Available cache algorithms with their factory functions:
-
-```python
-# Basic algorithms
-LRU(cache_size: int) -> Cache
-LFU(cache_size: int) -> Cache
-FIFO(cache_size: int) -> Cache
-Clock(cache_size: int) -> Cache
-Random(cache_size: int) -> Cache
-
-# Advanced algorithms
-ARC(cache_size: int) -> Cache
-S3FIFO(cache_size: int) -> Cache
-Sieve(cache_size: int) -> Cache
-TinyLFU(cache_size: int) -> Cache
-TwoQ(cache_size: int) -> Cache
-LRB(cache_size: int) -> Cache
-
-# Experimental algorithms
-cache_3L(cache_size: int) -> Cache
-```
-
-### Performance Metrics
-
-```python
-class CacheStats:
- """Cache performance statistics."""
-
- def __init__(self):
- self.hits = 0
- self.misses = 0
- self.evictions = 0
- self.bytes_written = 0
- self.bytes_read = 0
-
- @property
- def hit_ratio(self) -> float:
- """Calculate hit ratio."""
- total = self.hits + self.misses
- return self.hits / total if total > 0 else 0.0
-
- @property
- def miss_ratio(self) -> float:
- """Calculate miss ratio."""
- return 1.0 - self.hit_ratio
-```
-
-## Error Handling
-
-The library uses standard Python exceptions:
-
-- `ValueError`: Invalid parameters or configuration
-- `FileNotFoundError`: Trace file not found
-- `RuntimeError`: Runtime errors from underlying C++ library
-- `MemoryError`: Out of memory conditions
-
-Example error handling:
-
-```python
-try:
- reader = lcs.TraceReader("nonexistent.csv", lcs.TraceType.CSV_TRACE)
-except FileNotFoundError:
- print("Trace file not found")
-except ValueError as e:
- print(f"Invalid configuration: {e}")
-```
-
-## Configuration Options
-
-### Reader Configuration
-
-```python
-reader_params = lcs.ReaderInitParam(
- has_header=True, # CSV has header row
- delimiter=",", # Field delimiter
- obj_id_is_num=True, # Object IDs are numeric
- ignore_obj_size=False, # Don't ignore object sizes
- ignore_size_zero_req=True, # Ignore zero-size requests
- cap_at_n_req=1000000, # Limit number of requests
- block_size=4096, # Block size for block-based traces
- trace_start_offset=0, # Skip initial requests
-)
-
-# Field mappings (1-indexed)
-reader_params.time_field = 1
-reader_params.obj_id_field = 2
-reader_params.obj_size_field = 3
-reader_params.op_field = 4
-```
-
-### Sampling Configuration
-
-```python
-sampler = lcs.Sampler(
- sample_ratio=0.1, # Sample 10% of requests
- type=lcs.SamplerType.SPATIAL_SAMPLER # Spatial sampling
-)
-reader_params.sampler = sampler
-```
-
-## Thread Safety
-
-The library provides thread-safe operations for most use cases:
-
-- Cache operations are thread-safe within a single cache instance
-- Multiple readers can be used concurrently
-- Analysis operations can utilize multiple threads
-
-For high-concurrency scenarios, consider using separate cache instances per thread.
-
-## Memory Management
-
-The library automatically manages memory for most operations:
-
-- Cache objects handle their own memory allocation
-- Trace readers manage buffering automatically
-- Request objects are lightweight and reusable
-
-For large-scale simulations, monitor memory usage and consider:
-
-- Using sampling to reduce trace size
-- Processing traces in chunks
-- Limiting cache sizes appropriately
-
-## Best Practices
-
-1. **Use appropriate cache sizes**: Size caches based on your simulation goals
-2. **Set random seeds**: For reproducible results in synthetic traces
-3. **Handle errors**: Always wrap file operations in try-catch blocks
-4. **Monitor memory**: For large traces, consider sampling or chunking
-5. **Use threading**: Leverage multi-threading for analysis tasks
-6. **Validate traces**: Check trace format and content before simulation
+[TBD]
\ No newline at end of file
diff --git a/docs/src/en/developer.md b/docs/src/en/developer.md
new file mode 100644
index 0000000..8fcc019
--- /dev/null
+++ b/docs/src/en/developer.md
@@ -0,0 +1,3 @@
+# Developer Guide
+
+[TBD]
\ No newline at end of file
diff --git a/docs/src/en/examples.md b/docs/src/en/examples.md
deleted file mode 100644
index 0d56aa9..0000000
--- a/docs/src/en/examples.md
+++ /dev/null
@@ -1,501 +0,0 @@
-# Examples
-
-This page provides practical examples of using libCacheSim Python bindings for various cache simulation scenarios.
-
-## Basic Cache Simulation
-
-### Simple LRU Cache Example
-
-```python
-import libcachesim as lcs
-
-# Create an LRU cache with 1MB capacity
-cache = lcs.LRU(cache_size=1024*1024)
-
-# Generate synthetic Zipf trace
-reader = lcs.SyntheticReader(
- num_of_req=10000,
- obj_size=1024,
- dist="zipf",
- alpha=1.0,
- num_objects=1000,
- seed=42
-)
-
-# Simulate cache behavior
-hits = 0
-total = 0
-
-for req in reader:
- if cache.get(req):
- hits += 1
- total += 1
-
-print(f"Hit ratio: {hits/total:.4f}")
-print(f"Total requests: {total}")
-```
-
-### Comparing Multiple Cache Algorithms
-
-```python
-import libcachesim as lcs
-
-def compare_algorithms(trace_file, cache_size):
- """Compare hit ratios of different cache algorithms."""
-
- algorithms = {
- "LRU": lcs.LRU,
- "LFU": lcs.LFU,
- "FIFO": lcs.FIFO,
- "Clock": lcs.Clock,
- "ARC": lcs.ARC,
- "S3FIFO": lcs.S3FIFO
- }
-
- results = {}
-
- for name, cache_class in algorithms.items():
- # Create fresh reader for each algorithm
- reader = lcs.SyntheticReader(
- num_of_req=10000,
- obj_size=1024,
- dist="zipf",
- alpha=1.0,
- seed=42 # Same seed for fair comparison
- )
-
- cache = cache_class(cache_size=cache_size)
- hits = 0
-
- for req in reader:
- if cache.get(req):
- hits += 1
-
- hit_ratio = hits / reader.get_num_of_req()
- results[name] = hit_ratio
- print(f"{name:8}: {hit_ratio:.4f}")
-
- return results
-
-# Compare with 64KB cache
-results = compare_algorithms("trace.csv", 64*1024)
-```
-
-## Working with Real Traces
-
-### Reading CSV Traces
-
-```python
-import libcachesim as lcs
-
-def simulate_csv_trace(csv_file):
- """Simulate cache behavior on CSV trace."""
-
- # Configure CSV reader
- reader_params = lcs.ReaderInitParam(
- has_header=True,
- delimiter=",",
- obj_id_is_num=True
- )
-
- # Set field mappings (1-indexed)
- reader_params.time_field = 1
- reader_params.obj_id_field = 2
- reader_params.obj_size_field = 3
- reader_params.op_field = 4
-
- reader = lcs.TraceReader(
- trace=csv_file,
- trace_type=lcs.TraceType.CSV_TRACE,
- reader_init_params=reader_params
- )
-
- print(f"Loaded trace with {reader.get_num_of_req()} requests")
-
- # Test different cache sizes
- cache_sizes = [1024*1024*i for i in [1, 2, 4, 8, 16]] # 1MB to 16MB
-
- for size in cache_sizes:
- cache = lcs.LRU(cache_size=size)
- reader.reset() # Reset to beginning
-
- hits = 0
- for req in reader:
- if cache.get(req):
- hits += 1
-
- hit_ratio = hits / reader.get_num_of_req()
- print(f"Cache size: {size//1024//1024}MB, Hit ratio: {hit_ratio:.4f}")
-
-# Usage
-simulate_csv_trace("workload.csv")
-```
-
-### Handling Large Traces with Sampling
-
-```python
-import libcachesim as lcs
-
-def analyze_large_trace(trace_file, sample_ratio=0.1):
- """Analyze large trace using sampling."""
-
- # Create sampler
- sampler = lcs.Sampler(
- sample_ratio=sample_ratio,
- type=lcs.SamplerType.SPATIAL_SAMPLER
- )
-
- reader_params = lcs.ReaderInitParam(
- has_header=True,
- delimiter=",",
- obj_id_is_num=True
- )
- reader_params.sampler = sampler
-
- reader = lcs.TraceReader(
- trace=trace_file,
- trace_type=lcs.TraceType.CSV_TRACE,
- reader_init_params=reader_params
- )
-
- print(f"Sampling {sample_ratio*100}% of trace")
- print(f"Sampled requests: {reader.get_num_of_req()}")
-
- # Run simulation on sampled trace
- cache = lcs.LRU(cache_size=10*1024*1024) # 10MB
- hits = 0
-
- for req in reader:
- if cache.get(req):
- hits += 1
-
- hit_ratio = hits / reader.get_num_of_req()
- print(f"Hit ratio on sampled trace: {hit_ratio:.4f}")
-
-# Sample 5% of a large trace
-analyze_large_trace("large_trace.csv", sample_ratio=0.05)
-```
-
-## Advanced Analysis
-
-### Comprehensive Trace Analysis
-
-```python
-import libcachesim as lcs
-import os
-
-def comprehensive_analysis(trace_file, output_dir="analysis_results"):
- """Run comprehensive trace analysis."""
-
- # Create output directory
- os.makedirs(output_dir, exist_ok=True)
-
- # Load trace
- reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE)
-
- # Run trace analysis
- analyzer = lcs.TraceAnalyzer(reader, f"{output_dir}/trace_analysis")
- print("Running trace analysis...")
- analyzer.run()
-
- print(f"Analysis complete. Results saved to {output_dir}/")
- print("Generated files:")
- for file in os.listdir(output_dir):
- print(f" - {file}")
-
-# Run analysis
-comprehensive_analysis("workload.csv")
-```
-
-### Hit Ratio Curves
-
-```python
-import libcachesim as lcs
-import matplotlib.pyplot as plt
-
-def plot_hit_ratio_curve(trace_file, algorithms=None):
- """Plot hit ratio curves for different algorithms."""
-
- if algorithms is None:
- algorithms = ["LRU", "LFU", "FIFO", "ARC"]
-
- # Cache sizes from 1MB to 100MB
- cache_sizes = [1024*1024*i for i in range(1, 101, 5)]
-
- plt.figure(figsize=(10, 6))
-
- for algo_name in algorithms:
- hit_ratios = []
-
- for cache_size in cache_sizes:
- reader = lcs.SyntheticReader(
- num_of_req=5000,
- obj_size=1024,
- dist="zipf",
- alpha=1.0,
- seed=42
- )
-
- cache = getattr(lcs, algo_name)(cache_size=cache_size)
- hits = 0
-
- for req in reader:
- if cache.get(req):
- hits += 1
-
- hit_ratio = hits / reader.get_num_of_req()
- hit_ratios.append(hit_ratio)
-
- # Convert to MB for plotting
- sizes_mb = [size // 1024 // 1024 for size in cache_sizes]
- plt.plot(sizes_mb, hit_ratios, label=algo_name, marker='o')
-
- plt.xlabel('Cache Size (MB)')
- plt.ylabel('Hit Ratio')
- plt.title('Hit Ratio vs Cache Size')
- plt.legend()
- plt.grid(True, alpha=0.3)
- plt.show()
-
-# Generate hit ratio curves
-plot_hit_ratio_curve("trace.csv")
-```
-
-## Custom Cache Policies
-
-### Implementing a Custom LRU with Python Hooks
-
-```python
-import libcachesim as lcs
-from collections import OrderedDict
-
-def create_python_lru(cache_size):
- """Create a custom LRU cache using Python hooks."""
-
- def init_hook(size):
- """Initialize cache data structure."""
- return {
- 'data': OrderedDict(),
- 'size': 0,
- 'capacity': size
- }
-
- def hit_hook(cache_dict, obj_id, obj_size):
- """Handle cache hit."""
- # Move to end (most recently used)
- cache_dict['data'].move_to_end(obj_id)
-
- def miss_hook(cache_dict, obj_id, obj_size):
- """Handle cache miss."""
- # Add new item
- cache_dict['data'][obj_id] = obj_size
- cache_dict['size'] += obj_size
-
- def eviction_hook(cache_dict, obj_id, obj_size):
- """Handle eviction when cache is full."""
- # Remove least recently used items
- while cache_dict['size'] + obj_size > cache_dict['capacity']:
- if not cache_dict['data']:
- break
- lru_id, lru_size = cache_dict['data'].popitem(last=False)
- cache_dict['size'] -= lru_size
-
- return lcs.PythonHookCache(
- cache_size=cache_size,
- init_hook=init_hook,
- hit_hook=hit_hook,
- miss_hook=miss_hook,
- eviction_hook=eviction_hook
- )
-
-# Test custom LRU
-custom_cache = create_python_lru(1024*1024)
-reader = lcs.SyntheticReader(num_of_req=1000, obj_size=1024)
-
-hits = 0
-for req in reader:
- if custom_cache.get(req):
- hits += 1
-
-print(f"Custom LRU hit ratio: {hits/1000:.4f}")
-```
-
-### Time-based Cache with TTL
-
-```python
-import libcachesim as lcs
-import time
-
-def create_ttl_cache(cache_size, ttl_seconds=300):
- """Create a cache with time-to-live (TTL) expiration."""
-
- def init_hook(size):
- return {
- 'data': {},
- 'timestamps': {},
- 'size': 0,
- 'capacity': size,
- 'ttl': ttl_seconds
- }
-
- def is_expired(cache_dict, obj_id):
- """Check if object has expired."""
- if obj_id not in cache_dict['timestamps']:
- return True
- return time.time() - cache_dict['timestamps'][obj_id] > cache_dict['ttl']
-
- def hit_hook(cache_dict, obj_id, obj_size):
- """Handle cache hit."""
- if is_expired(cache_dict, obj_id):
- # Expired, treat as miss
- if obj_id in cache_dict['data']:
- del cache_dict['data'][obj_id]
- del cache_dict['timestamps'][obj_id]
- cache_dict['size'] -= obj_size
- return False
- return True
-
- def miss_hook(cache_dict, obj_id, obj_size):
- """Handle cache miss."""
- current_time = time.time()
- cache_dict['data'][obj_id] = obj_size
- cache_dict['timestamps'][obj_id] = current_time
- cache_dict['size'] += obj_size
-
- def eviction_hook(cache_dict, obj_id, obj_size):
- """Handle eviction."""
- # First try to evict expired items
- current_time = time.time()
- expired_items = []
-
- for oid, timestamp in cache_dict['timestamps'].items():
- if current_time - timestamp > cache_dict['ttl']:
- expired_items.append(oid)
-
- for oid in expired_items:
- if oid in cache_dict['data']:
- cache_dict['size'] -= cache_dict['data'][oid]
- del cache_dict['data'][oid]
- del cache_dict['timestamps'][oid]
-
- # If still need space, evict oldest items
- while cache_dict['size'] + obj_size > cache_dict['capacity']:
- if not cache_dict['data']:
- break
- # Find oldest item
- oldest_id = min(cache_dict['timestamps'].keys(),
- key=lambda x: cache_dict['timestamps'][x])
- cache_dict['size'] -= cache_dict['data'][oldest_id]
- del cache_dict['data'][oldest_id]
- del cache_dict['timestamps'][oldest_id]
-
- return lcs.PythonHookCache(
- cache_size=cache_size,
- init_hook=init_hook,
- hit_hook=hit_hook,
- miss_hook=miss_hook,
- eviction_hook=eviction_hook
- )
-
-# Test TTL cache
-ttl_cache = create_ttl_cache(1024*1024, ttl_seconds=60)
-```
-
-## Performance Optimization
-
-### Batch Processing for Large Workloads
-
-```python
-import libcachesim as lcs
-
-def batch_simulation(trace_file, batch_size=10000):
- """Process large traces in batches to optimize memory usage."""
-
- reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE)
- cache = lcs.LRU(cache_size=10*1024*1024)
-
- total_requests = 0
- total_hits = 0
- batch_count = 0
-
- while True:
- batch_hits = 0
- batch_requests = 0
-
- # Process a batch of requests
- for _ in range(batch_size):
- try:
- req = reader.read_one_req()
- if req.valid:
- if cache.get(req):
- batch_hits += 1
- batch_requests += 1
- else:
- break # End of trace
- except:
- break
-
- if batch_requests == 0:
- break
-
- total_hits += batch_hits
- total_requests += batch_requests
- batch_count += 1
-
- # Print progress
- hit_ratio = batch_hits / batch_requests
- print(f"Batch {batch_count}: {batch_requests} requests, "
- f"hit ratio: {hit_ratio:.4f}")
-
- overall_hit_ratio = total_hits / total_requests
- print(f"Overall: {total_requests} requests, hit ratio: {overall_hit_ratio:.4f}")
-
-# Process in batches
-batch_simulation("large_trace.csv", batch_size=50000)
-```
-
-### Multi-threaded Analysis
-
-```python
-import libcachesim as lcs
-import concurrent.futures
-import threading
-
-def parallel_cache_comparison(trace_file, algorithms, cache_size):
- """Compare cache algorithms in parallel."""
-
- def simulate_algorithm(algo_name):
- """Simulate single algorithm."""
- reader = lcs.TraceReader(trace_file, lcs.TraceType.CSV_TRACE)
- cache = getattr(lcs, algo_name)(cache_size=cache_size)
-
- hits = 0
- total = 0
-
- for req in reader:
- if cache.get(req):
- hits += 1
- total += 1
-
- hit_ratio = hits / total if total > 0 else 0
- return algo_name, hit_ratio
-
- # Run simulations in parallel
- with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
- futures = {executor.submit(simulate_algorithm, algo): algo
- for algo in algorithms}
-
- results = {}
- for future in concurrent.futures.as_completed(futures):
- algo_name, hit_ratio = future.result()
- results[algo_name] = hit_ratio
- print(f"{algo_name}: {hit_ratio:.4f}")
-
- return results
-
-# Compare algorithms in parallel
-algorithms = ["LRU", "LFU", "FIFO", "ARC", "S3FIFO"]
-results = parallel_cache_comparison("trace.csv", algorithms, 1024*1024)
-```
-
-These examples demonstrate the versatility and power of libCacheSim Python bindings for cache simulation, analysis, and research. You can modify and extend these examples for your specific use cases.
diff --git a/docs/src/en/examples/analysis.md b/docs/src/en/examples/analysis.md
new file mode 100644
index 0000000..ccdcb6f
--- /dev/null
+++ b/docs/src/en/examples/analysis.md
@@ -0,0 +1,3 @@
+# Trace Analysis
+
+[TBD]
\ No newline at end of file
diff --git a/docs/src/en/plugin.md b/docs/src/en/examples/plugins.md
similarity index 100%
rename from docs/src/en/plugin.md
rename to docs/src/en/examples/plugins.md
diff --git a/docs/src/en/examples/simulation.md b/docs/src/en/examples/simulation.md
new file mode 100644
index 0000000..03d5e76
--- /dev/null
+++ b/docs/src/en/examples/simulation.md
@@ -0,0 +1,3 @@
+# Cache Simulation
+
+[TBD]
\ No newline at end of file
diff --git a/docs/src/en/faq.md b/docs/src/en/faq.md
new file mode 100644
index 0000000..dd82326
--- /dev/null
+++ b/docs/src/en/faq.md
@@ -0,0 +1,5 @@
+# Frequently Asked Questions
+
+1. How to resolve when pip install fails?
+
+See [installation](https://cachemon.github.io/libCacheSim-python/getting_started/installation/).
\ No newline at end of file
diff --git a/docs/src/en/getting_started/installation.md b/docs/src/en/getting_started/installation.md
new file mode 100644
index 0000000..7e0f4ef
--- /dev/null
+++ b/docs/src/en/getting_started/installation.md
@@ -0,0 +1,3 @@
+# Installation
+
+[TBD]
\ No newline at end of file
diff --git a/docs/src/en/getting_started/quickstart.md b/docs/src/en/getting_started/quickstart.md
new file mode 100644
index 0000000..b913a9d
--- /dev/null
+++ b/docs/src/en/getting_started/quickstart.md
@@ -0,0 +1,205 @@
+# Quickstart
+
+This guide will help you get started with libCacheSim.
+
+## Prerequisites
+
+- OS: Linux / macOS
+- Python: 3.9 -- 3.13
+
+## Installation
+
+You can install libCacheSim using [pip](https://pypi.org/project/libcachesim/) directly.
+
+It's recommended to use [uv](https://docs.astral.sh/uv/), a very fast Python environment manager, to create and manage Python environments. Please follow the [documentation](https://docs.astral.sh/uv/#getting-started) to install `uv`. After installing `uv`, you can create a new Python environment and install libCacheSim using the following commands:
+
+```bash
+uv venv --python 3.12 --seed
+source .venv/bin/activate
+uv pip install libcachesim
+```
+
+For users who want to run LRB, ThreeLCache, and GLCache eviction algorithms:
+
+!!! important
+ if `uv` cannot find built wheels for your machine, the building system will skip these algorithms by default.
+
+To enable them, you need to install all third-party dependencies first.
+
+!!! note
+ To install all dependencies, you can use these scripts provided.
+ ```bash
+ git clone https://github.com/cacheMon/libCacheSim-python.git
+ cd libCacheSim-python
+ bash scripts/install_deps.sh
+
+ # If you cannot install software directly (e.g., no sudo access)
+ bash scripts/install_deps_user.sh
+ ```
+
+Then, you can reinstall libcachesim using the following commands:
+
+```bash
+# Enable LRB
+CMAKE_ARGS="-DENABLE_LRB=ON" uv pip install libcachesim
+# Enable ThreeLCache
+CMAKE_ARGS="-DENABLE_3L_CACHE=ON" uv pip install libcachesim
+# Enable GLCache
+CMAKE_ARGS="-DENABLE_GLCACHE=ON" uv pip install libcachesim
+```
+
+## Cache Simulation
+
+With libcachesim installed, you can start cache simulation for some eviction algorithm and cache traces. See the example script:
+
+??? code
+ ```python
+ import libcachesim as lcs
+
+ # Step 1: Get one trace from S3 bucket
+ URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
+ dl = lcs.DataLoader()
+ dl.load(URI)
+
+ # Step 2: Open trace and process efficiently
+ reader = lcs.TraceReader(
+ trace = dl.get_cache_path(URI),
+ trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
+ reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
+ )
+
+ # Step 3: Initialize cache
+ cache = lcs.S3FIFO(cache_size=1024*1024)
+
+ # Step 4: Process entire trace efficiently (C++ backend)
+ obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
+ print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+
+ # Step 4.1: Process with limited number of requests
+ cache = lcs.S3FIFO(cache_size=1024*1024)
+ obj_miss_ratio, byte_miss_ratio = cache.process_trace(
+ reader,
+ start_req=0,
+ max_req=1000
+ )
+ print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}")
+ ```
+
+The above example demonstrates the basic workflow of using `libcachesim` for cache simulation:
+
+1. Use `DataLoader` to download a cache trace file from an S3 bucket.
+2. Open and efficiently process the trace file with `TraceReader`.
+3. Initialize a cache object (here, `S3FIFO`) with a specified cache size (e.g., 1MB).
+4. Run the simulation on the entire trace using `process_trace` to obtain object and byte miss ratios.
+5. Optionally, process only a portion of the trace by specifying `start_req` and `max_req` for partial simulation.
+
+This workflow applies to most cache algorithms and trace types, making it easy to get started and customize your experiments.
+
+## Trace Analysis
+
+Here is an example demonstrating how to use `TraceAnalyzer`.
+
+??? code
+ ```python
+ import libcachesim as lcs
+
+ # Step 1: Get one trace from S3 bucket
+ URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
+ dl = lcs.DataLoader()
+ dl.load(URI)
+
+ reader = lcs.TraceReader(
+ trace = dl.get_cache_path(URI),
+ trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
+ reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False)
+ )
+
+ analysis_option = lcs.AnalysisOption(
+ req_rate=True, # Keep basic request rate analysis
+ access_pattern=False, # Disable access pattern analysis
+ size=True, # Keep size analysis
+ reuse=False, # Disable reuse analysis for small datasets
+ popularity=False, # Disable popularity analysis for small datasets (< 200 objects)
+ ttl=False, # Disable TTL analysis
+ popularity_decay=False, # Disable popularity decay analysis
+ lifetime=False, # Disable lifetime analysis
+ create_future_reuse_ccdf=False, # Disable experimental features
+ prob_at_age=False, # Disable experimental features
+ size_change=False, # Disable size change analysis
+ )
+
+ analysis_param = lcs.AnalysisParam()
+
+ analyzer = lcs.TraceAnalyzer(
+ reader, "example_analysis", analysis_option=analysis_option, analysis_param=analysis_param
+ )
+
+ analyzer.run()
+ ```
+
+The above code demonstrates how to perform trace analysis using `libcachesim`. The workflow is as follows:
+
+1. Download a trace file from an S3 bucket using `DataLoader`.
+2. Open the trace file with `TraceReader`, specifying the trace type and any reader initialization parameters.
+3. Configure the analysis options with `AnalysisOption` to enable or disable specific analyses (such as request rate, size, etc.).
+4. Optionally, set additional analysis parameters with `AnalysisParam`.
+5. Create a `TraceAnalyzer` object with the reader, output directory, and the chosen options and parameters.
+6. Run the analysis with `analyzer.run()`.
+
+After running, you can access the analysis results, such as summary statistics (`stat`) or detailed results (e.g., `example_analysis.size`).
+
+## Plugin System
+
+libCacheSim also allows user to develop their own cache eviction algorithms and test them via the plugin system.
+
+Here is an example of implement `LRU` via the plugin system.
+
+??? code
+ ```python
+ from collections import OrderedDict
+ from typing import Any
+
+ from libcachesim import PluginCache, LRU, CommonCacheParams, Request
+
+ def init_hook(_: CommonCacheParams) -> Any:
+ return OrderedDict()
+
+ def hit_hook(data: Any, req: Request) -> None:
+ data.move_to_end(req.obj_id, last=True)
+
+ def miss_hook(data: Any, req: Request) -> None:
+ data.__setitem__(req.obj_id, req.obj_size)
+
+ def eviction_hook(data: Any, _: Request) -> int:
+ return data.popitem(last=False)[0]
+
+ def remove_hook(data: Any, obj_id: int) -> None:
+ data.pop(obj_id, None)
+
+ def free_hook(data: Any) -> None:
+ data.clear()
+
+
+ plugin_lru_cache = PluginCache(
+ cache_size=128,
+ cache_init_hook=init_hook,
+ cache_hit_hook=hit_hook,
+ cache_miss_hook=miss_hook,
+ cache_eviction_hook=eviction_hook,
+ cache_remove_hook=remove_hook,
+ cache_free_hook=free_hook,
+ cache_name="Plugin_LRU",
+ )
+
+ reader = lcs.SyntheticReader(num_objects=1000, num_of_req=10000, obj_size=1)
+ req_miss_ratio, byte_miss_ratio = plugin_lru_cache.process_trace(reader)
+ ref_req_miss_ratio, ref_byte_miss_ratio = LRU(128).process_trace(reader)
+ print(f"plugin req miss ratio {req_miss_ratio}, ref req miss ratio {ref_req_miss_ratio}")
+ print(f"plugin byte miss ratio {byte_miss_ratio}, ref byte miss ratio {ref_byte_miss_ratio}")
+ ```
+
+By defining custom hook functions for cache initialization, hit, miss, eviction, removal, and cleanup, users can easily prototype and test their own cache eviction algorithms.
+
+
+
+
diff --git a/docs/src/en/index.md b/docs/src/en/index.md
index 2eba51f..fbf84ae 100644
--- a/docs/src/en/index.md
+++ b/docs/src/en/index.md
@@ -1,68 +1,35 @@
-# libCacheSim Python Bindings
+# Welcome to libCacheSim Python
-Welcome to libCacheSim Python bindings! This is a high-performance cache simulation library with Python interface.
+!!! note
+ For convenience, we refer to the *libCacheSim Python Package* (this repo) as *libCacheSim* and the *C library* as *libCacheSim lib* in the following documentation.
-## Overview
+
+A high-performance library for building and running cache simulations + +
-## Key Features + -- **High Performance**: Built on top of the optimized C++ libCacheSim library -- **Multiple Cache Algorithms**: Support for LRU, LFU, FIFO, ARC, Clock, S3FIFO, Sieve, and many more -- **Trace Support**: Read various trace formats (CSV, binary, OracleGeneral, etc.) -- **Synthetic Traces**: Generate synthetic workloads with Zipf and uniform distributions -- **Analysis Tools**: Built-in trace analysis and cache performance evaluation -- **Easy Integration**: Simple Python API for research and production use +libCacheSim is an easy-to-use python binding of [libCachesim lib](https://github.com/1a1a11a/libCacheSim) for building and running cache simulations. -## Quick Example +libCacheSim is fast with the features from [underlying libCacheSim lib](https://github.com/1a1a11a/libCacheSim): -```python -import libcachesim as lcs +- High performance - over 20M requests/sec for a realistic trace replay. +- High memory efficiency - predictable and small memory footprint. +- Parallelism out-of-the-box - uses the many CPU cores to speed up trace analysis and cache simulations. -# Create a cache -cache = lcs.LRU(cache_size=1024*1024) # 1MB cache +libCacheSim is flexible and easy to use with: -# Generate synthetic trace -reader = lcs.SyntheticReader( - num_of_req=10000, - obj_size=1024, - dist="zipf", - alpha=1.0 -) - -# Simulate cache behavior -hit_count = 0 -for req in reader: - if cache.get(req): - hit_count += 1 - -hit_ratio = hit_count / reader.get_num_of_req() -print(f"Hit ratio: {hit_ratio:.4f}") -``` - -## Installation - -```bash -pip install libcachesim -``` - -Or install from source: - -```bash -git clone https://github.com/cacheMon/libCacheSim-python.git -cd libCacheSim-python -pip install -e . -``` - -## Getting Started - -Check out our [Quick Start Guide](quickstart.md) to begin using libCacheSim Python bindings, or explore the [API Reference](api.md) for detailed documentation. - -## Contributing - -We welcome contributions! Please see our [GitHub repository](https://github.com/cacheMon/libCacheSim-python) for more information. - -## License - -This project is licensed under the GPL-3.0 License. +- Seamless integration with [open-source cache dataset](https://github.com/cacheMon/cache_dataset) consisting of thousands traces hosted on S3. +- High-throughput simulation with the [underlying libCacheSim lib](https://github.com/1a1a11a/libCacheSim) +- Detailed cache requests and other internal data control +- Customized plugin cache development without any compilation \ No newline at end of file diff --git a/docs/src/en/quickstart.md b/docs/src/en/quickstart.md deleted file mode 100644 index 2e32f4d..0000000 --- a/docs/src/en/quickstart.md +++ /dev/null @@ -1,183 +0,0 @@ -# Quick Start Guide - -This guide will help you get started with libCacheSim Python bindings. - -## Installation - -### From PyPI (Recommended) - -```bash -pip install libcachesim -``` - -### From Source - -```bash -git clone https://github.com/cacheMon/libCacheSim-python.git -cd libCacheSim-python -git submodule update --init --recursive -pip install -e . -``` - -## Basic Usage - -### 1. Creating a Cache - -```python -import libcachesim as lcs - -# Create different types of caches -lru_cache = lcs.LRU(cache_size=1024*1024) # 1MB LRU cache -lfu_cache = lcs.LFU(cache_size=1024*1024) # 1MB LFU cache -fifo_cache = lcs.FIFO(cache_size=1024*1024) # 1MB FIFO cache -``` - -### 2. Using Synthetic Traces - -```python -# Generate Zipf-distributed requests -reader = lcs.SyntheticReader( - num_of_req=10000, - obj_size=1024, - dist="zipf", - alpha=1.0, - num_objects=1000, - seed=42 -) - -# Simulate cache behavior -cache = lcs.LRU(cache_size=50*1024) -hit_count = 0 - -for req in reader: - if cache.get(req): - hit_count += 1 - -print(f"Hit ratio: {hit_count/reader.get_num_of_req():.4f}") -``` - -### 3. Reading Real Traces - -```python -# Read CSV trace -reader = lcs.TraceReader( - trace="path/to/trace.csv", - trace_type=lcs.TraceType.CSV_TRACE, - has_header=True, - delimiter=",", - obj_id_is_num=True -) - -# Process requests -cache = lcs.LRU(cache_size=1024*1024) -for req in reader: - result = cache.get(req) - # Process result... -``` - -### 4. Cache Performance Analysis - -```python -# Run comprehensive analysis -analyzer = lcs.TraceAnalyzer(reader, "output_prefix") -analyzer.run() - -# This generates various analysis files: -# - Hit ratio curves -# - Access pattern analysis -# - Temporal locality analysis -# - And more... -``` - -## Available Cache Algorithms - -libCacheSim supports numerous cache algorithms: - -### Basic Algorithms -- **LRU**: Least Recently Used -- **LFU**: Least Frequently Used -- **FIFO**: First In, First Out -- **Clock**: Clock algorithm -- **Random**: Random replacement - -### Advanced Algorithms -- **ARC**: Adaptive Replacement Cache -- **S3FIFO**: Simple, Fast, Fair FIFO -- **Sieve**: Sieve eviction algorithm -- **TinyLFU**: Tiny LFU with admission control -- **TwoQ**: Two-Queue algorithm -- **LRB**: Learning Relaxed Belady - -### Experimental Algorithms -- **3LCache**: Three-Level Cache -- **And many more...** - -## Trace Formats - -Supported trace formats include: - -- **CSV**: Comma-separated values -- **Binary**: Custom binary format -- **OracleGeneral**: Oracle general format -- **Vscsi**: VMware vSCSI format -- **And more...** - -## Advanced Features - -### Custom Cache Policies - -You can implement custom cache policies using Python hooks: - -```python -from collections import OrderedDict - -def create_custom_lru(): - def init_hook(cache_size): - return OrderedDict() - - def hit_hook(cache_dict, obj_id, obj_size): - cache_dict.move_to_end(obj_id) - - def miss_hook(cache_dict, obj_id, obj_size): - cache_dict[obj_id] = obj_size - - def eviction_hook(cache_dict, obj_id, obj_size): - if cache_dict: - cache_dict.popitem(last=False) - - return lcs.PythonHookCache( - cache_size=1024*1024, - init_hook=init_hook, - hit_hook=hit_hook, - miss_hook=miss_hook, - eviction_hook=eviction_hook - ) - -custom_cache = create_custom_lru() -``` - -### Trace Sampling - -```python -# Sample 10% of requests spatially -reader = lcs.TraceReader( - trace="large_trace.csv", - trace_type=lcs.TraceType.CSV_TRACE, - sampling_ratio=0.1, - sampling_type=lcs.SamplerType.SPATIAL_SAMPLER -) -``` - -### Multi-threaded Analysis - -```python -# Use multiple threads for analysis -analyzer = lcs.TraceAnalyzer(reader, "output", n_threads=4) -analyzer.run() -``` - -## Next Steps - -- Explore the [API Reference](api.md) for detailed documentation -- Check out [Examples](examples.md) for more complex use cases -- Visit our [GitHub repository](https://github.com/cacheMon/libCacheSim-python) for source code and issues diff --git a/examples/basic_usage.py b/examples/basic_usage.py index e8dd208..2a4bd60 100644 --- a/examples/basic_usage.py +++ b/examples/basic_usage.py @@ -7,23 +7,19 @@ # Step 2: Open trace and process efficiently reader = lcs.TraceReader( - trace = dl.get_cache_path(URI), - trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE, - reader_init_params = lcs.ReaderInitParam(ignore_obj_size=False) + trace=dl.get_cache_path(URI), + trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE, + reader_init_params=lcs.ReaderInitParam(ignore_obj_size=False), ) # Step 3: Initialize cache -cache = lcs.S3FIFO(cache_size=1024*1024) +cache = lcs.S3FIFO(cache_size=1024 * 1024) # Step 4: Process entire trace efficiently (C++ backend) obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader) print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") # Step 4.1: Process with limited number of requests -cache = lcs.S3FIFO(cache_size=1024*1024) -obj_miss_ratio, byte_miss_ratio = cache.process_trace( - reader, - start_req=0, - max_req=1000 -) -print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") \ No newline at end of file +cache = lcs.S3FIFO(cache_size=1024 * 1024) +obj_miss_ratio, byte_miss_ratio = cache.process_trace(reader, start_req=0, max_req=1000) +print(f"Object miss ratio: {obj_miss_ratio:.4f}, Byte miss ratio: {byte_miss_ratio:.4f}") diff --git a/examples/plugin_cache/s3fifo.py b/examples/plugin_cache/s3fifo.py index 1207e23..aa1fcdf 100644 --- a/examples/plugin_cache/s3fifo.py +++ b/examples/plugin_cache/s3fifo.py @@ -8,13 +8,16 @@ from collections import deque from libcachesim import PluginCache, CommonCacheParams, Request, S3FIFO, FIFO, SyntheticReader + # NOTE(haocheng): we only support ignore object size for now class StandaloneS3FIFO: - def __init__(self, - small_size_ratio: float = 0.1, - ghost_size_ratio: float = 0.9, - move_to_main_threshold: int = 2, - cache_size: int = 1024): + def __init__( + self, + small_size_ratio: float = 0.1, + ghost_size_ratio: float = 0.9, + move_to_main_threshold: int = 2, + cache_size: int = 1024, + ): self.cache_size = cache_size small_fifo_size = int(small_size_ratio * cache_size) main_fifo_size = cache_size - small_fifo_size @@ -27,15 +30,15 @@ def __init__(self, self.small_fifo = FIFO(small_fifo_size) self.main_fifo = FIFO(main_fifo_size) self.ghost_fifo = FIFO(ghost_fifo_size) - + # Frequency tracking self.freq = {} - + # Other parameters self.max_freq = 3 self.move_to_main_threshold = move_to_main_threshold - self.has_evicted = False # Mark if we start to evict, only after full we will start eviction + self.has_evicted = False # Mark if we start to evict, only after full we will start eviction self.hit_on_ghost = False def cache_hit(self, req: Request): @@ -46,7 +49,7 @@ def cache_hit(self, req: Request): if self.main_fifo.find(req, update_cache=False): self.freq[req.obj_id] += 1 - + def cache_miss(self, req: Request): if not self.hit_on_ghost: obj = self.ghost_fifo.find(req, update_cache=False) @@ -56,14 +59,13 @@ def cache_miss(self, req: Request): self.ghost_fifo.remove(req.obj_id) self.ghost_set.remove(req.obj_id) - # NOTE(haocheng): first we need to know this miss object has record in ghost or not if not self.hit_on_ghost: if req.obj_size >= self.small_fifo.cache_size: # If object is too large, we do not process it return - # If is initialization state, we need to insert to small fifo, + # If is initialization state, we need to insert to small fifo, # then we can insert to main fifo if not self.has_evicted and self.small_fifo.get_occupied_byte() >= self.small_fifo.cache_size: obj = self.main_fifo.insert(req) @@ -76,7 +78,7 @@ def cache_miss(self, req: Request): self.main_set.add(req.obj_id) self.hit_on_ghost = False self.freq[obj.obj_id] = 0 - + def cache_evict_small(self, req: Request): has_evicted = False evicted_id = None @@ -100,7 +102,7 @@ def cache_evict_small(self, req: Request): self.small_set.remove(evicted_id) assert flag, "Should be able to remove" return real_evicted_id - + def cache_evict_main(self, req: Request): has_evicted = False evicted_id = None @@ -134,15 +136,15 @@ def cache_evict(self, req: Request): self.ghost_set.remove(req.obj_id) self.has_evicted = True - cond = (self.main_fifo.get_occupied_byte() > self.main_fifo.cache_size) - if (cond or (self.small_fifo.get_occupied_byte() == 0)): + cond = self.main_fifo.get_occupied_byte() > self.main_fifo.cache_size + if cond or (self.small_fifo.get_occupied_byte() == 0): obj_id = self.cache_evict_main(req) else: obj_id = self.cache_evict_small(req) if obj_id is not None: del self.freq[obj_id] - + return obj_id def cache_remove(self, obj_id): @@ -151,28 +153,35 @@ def cache_remove(self, obj_id): removed |= self.ghost_fifo.remove(obj_id) removed |= self.main_fifo.remove(obj_id) return removed - + + def cache_init_hook(common_cache_params: CommonCacheParams): return StandaloneS3FIFO(cache_size=common_cache_params.cache_size) + def cache_hit_hook(cache, request: Request): cache.cache_hit(request) + def cache_miss_hook(cache, request: Request): cache.cache_miss(request) + def cache_eviction_hook(cache, request: Request): evicted_id = None while evicted_id is None: evicted_id = cache.cache_evict(request) return evicted_id + def cache_remove_hook(cache, obj_id): cache.cache_remove(obj_id) + def cache_free_hook(cache): pass + cache = PluginCache( cache_size=1024, cache_init_hook=cache_init_hook, @@ -181,7 +190,8 @@ def cache_free_hook(cache): cache_eviction_hook=cache_eviction_hook, cache_remove_hook=cache_remove_hook, cache_free_hook=cache_free_hook, - cache_name="S3FIFO") + cache_name="S3FIFO", +) URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" dl = lcs.DataLoader() @@ -189,9 +199,9 @@ def cache_free_hook(cache): # Step 2: Open trace and process efficiently reader = lcs.TraceReader( - trace = dl.get_cache_path(URI), - trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE, - reader_init_params = lcs.ReaderInitParam(ignore_obj_size=True) + trace=dl.get_cache_path(URI), + trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE, + reader_init_params=lcs.ReaderInitParam(ignore_obj_size=True), ) ref_s3fifo = S3FIFO(cache_size=1024, small_size_ratio=0.1, ghost_size_ratio=0.9, move_to_main_threshold=2) @@ -208,4 +218,4 @@ def cache_free_hook(cache): assert req_miss_ratio == ref_req_miss_ratio assert byte_miss_ratio == ref_byte_miss_ratio -print("All requests processed successfully. Plugin cache matches reference S3FIFO cache.") \ No newline at end of file +print("All requests processed successfully. Plugin cache matches reference S3FIFO cache.") diff --git a/examples/trace_analysis.py b/examples/trace_analysis.py new file mode 100644 index 0000000..0318171 --- /dev/null +++ b/examples/trace_analysis.py @@ -0,0 +1,32 @@ +import libcachesim as lcs + +# Step 1: Get one trace from S3 bucket +URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" +dl = lcs.DataLoader() +dl.load(URI) + +reader = lcs.TraceReader( + trace=dl.get_cache_path(URI), + trace_type=lcs.TraceType.ORACLE_GENERAL_TRACE, + reader_init_params=lcs.ReaderInitParam(ignore_obj_size=False), +) + +analysis_option = lcs.AnalysisOption( + req_rate=True, # Keep basic request rate analysis + access_pattern=False, # Disable access pattern analysis + size=True, # Keep size analysis + reuse=False, # Disable reuse analysis for small datasets + popularity=False, # Disable popularity analysis for small datasets (< 200 objects) + ttl=False, # Disable TTL analysis + popularity_decay=False, # Disable popularity decay analysis + lifetime=False, # Disable lifetime analysis + create_future_reuse_ccdf=False, # Disable experimental features + prob_at_age=False, # Disable experimental features + size_change=False, # Disable size change analysis +) + +analysis_param = lcs.AnalysisParam() + +analyzer = lcs.TraceAnalyzer(reader, "example_analysis", analysis_option=analysis_option, analysis_param=analysis_param) + +analyzer.run() diff --git a/libcachesim/cache.py b/libcachesim/cache.py index b61a512..94087e9 100644 --- a/libcachesim/cache.py +++ b/libcachesim/cache.py @@ -284,6 +284,7 @@ def __init__( def insert(self, req: Request) -> Optional[CacheObject]: return super().insert(req) + class TwoQ(CacheBase): """2Q replacement algorithm @@ -454,18 +455,24 @@ def __init__( class LRUProb(CacheBase): """LRU with Probabilistic Replacement - + Special parameters: prob: probability of promoting an object to the head of the queue (default: 0.5) """ def __init__( - self, cache_size: int, default_ttl: int = 86400 * 300, hashpower: int = 24, consider_obj_metadata: bool = False, + self, + cache_size: int, + default_ttl: int = 86400 * 300, + hashpower: int = 24, + consider_obj_metadata: bool = False, prob: float = 0.5, ): cache_specific_params = f"prob={prob}" super().__init__( - _cache=LRU_Prob_init(_create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params) + _cache=LRU_Prob_init( + _create_common_params(cache_size, default_ttl, hashpower, consider_obj_metadata), cache_specific_params + ) ) @@ -551,7 +558,9 @@ def __init__( try: from .libcachesim_python import ThreeLCache_init except ImportError: - raise ImportError("ThreeLCache is not installed. Please install it with `pip install libcachesim[all]`") + raise ImportError( + 'ThreeLCache is not installed. Please install it with `CMAKE_ARGS="-DENABLE_3L_CACHE=ON" pip install libcachesim --force-reinstall`' + ) cache_specific_params = f"objective={objective}" super().__init__( @@ -592,7 +601,9 @@ def __init__( try: from .libcachesim_python import GLCache_init except ImportError: - raise ImportError("GLCache is not installed. Please install it with `pip install libcachesim[all]`") + raise ImportError( + 'GLCache is not installed. Please install it with `CMAKE_ARGS="-DENABLE_GLCACHE=ON" pip install libcachesim --force-reinstall`' + ) cache_specific_params = f"segment-size={segment_size}, n-merge={n_merge}, type={type}, rank-intvl={rank_intvl}, merge-consecutive-segs={merge_consecutive_segs}, train-source-y={train_source_y}, retrain-intvl={retrain_intvl}" super().__init__( @@ -621,7 +632,9 @@ def __init__( try: from .libcachesim_python import LRB_init except ImportError: - raise ImportError("LRB is not installed. Please install it with `pip install libcachesim[all]`") + raise ImportError( + 'LRB is not installed. Please install it with `CMAKE_ARGS="-DENABLE_LRB=ON" pip install libcachesim --force-reinstall`' + ) cache_specific_params = f"objective={objective}" super().__init__( diff --git a/libcachesim/synthetic_reader.py b/libcachesim/synthetic_reader.py index b429242..936f29d 100644 --- a/libcachesim/synthetic_reader.py +++ b/libcachesim/synthetic_reader.py @@ -90,7 +90,7 @@ def read_one_req(self) -> Request: req = Request() if self.current_pos >= self.num_of_req: req.valid = False - return req # return invalid request + return req # return invalid request obj_id = self.obj_ids[self.current_pos] req.obj_id = obj_id diff --git a/libcachesim/trace_reader.py b/libcachesim/trace_reader.py index 20a2aba..d282a68 100644 --- a/libcachesim/trace_reader.py +++ b/libcachesim/trace_reader.py @@ -169,7 +169,7 @@ def get_num_of_req(self) -> int: def read_one_req(self) -> Request: req = Request() - ret = self._reader.read_one_req(req) # return 0 if success + ret = self._reader.read_one_req(req) # return 0 if success if ret != 0: raise RuntimeError("Failed to read one request") return req diff --git a/pyproject.toml b/pyproject.toml index 3618995..d71659c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "scikit_build_core.build" [project] name = "libcachesim" -version = "0.3.3" +version = "0.3.3.post2" description="Python bindings for libCacheSim" readme = "README.md" requires-python = ">=3.9" @@ -30,29 +30,14 @@ dependencies = [ "pytest>=8.4.1", ] + [project.optional-dependencies] test = ["pytest"] -dev = [ - "pytest", - "pre-commit", - "ruff>=0.7.0", - "mypy>=1.0.0", -] -all = [ - "xgboost", - "lightgbm" -] +dev = ["pytest", "pre-commit", "ruff>=0.7.0", "mypy>=1.0.0"] - -[tool.scikit-build] -wheel.expand-macos-universal-tags = true -build-dir = "build" -cmake.build-type = "Release" -cmake.args = ["-G", "Ninja"] -cmake.define = { CMAKE_OSX_DEPLOYMENT_TARGET = "14.0" } -cmake.version = ">=3.15" -cmake.source-dir = "." -install.strip = false +# ============================================================ +# pytest +# ============================================================ [tool.pytest.ini_options] minversion = "8.0" @@ -71,6 +56,23 @@ python_files = ["test.py", "test_*.py", "*_test.py"] python_classes = ["Test*"] python_functions = ["test_*"] +# ============================================================ +# scikit-build +# ============================================================ + +[tool.scikit-build] +build-dir = "build" + +[tool.scikit-build.cmake] +build-type = "Release" +args = ["-G", "Ninja"] +define = { CMAKE_OSX_DEPLOYMENT_TARGET = "14.0" } +version = ">=3.15" +source-dir = "." + +[tool.scikit-build.install] +strip = false + [tool.cibuildwheel] manylinux-x86_64-image = "quay.io/pypa/manylinux_2_34_x86_64" @@ -80,10 +82,11 @@ build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*"] skip = ["*-win32", "*-manylinux_i686", "*-musllinux*", "pp*"] # Set the environment variable for the wheel build step. -environment = { LCS_BUILD_DIR = "{project}/src/libCacheSim/build", MACOSX_DEPLOYMENT_TARGET = "14.0" } +# NOTE(haocheng): we enable all the optional features for the wheel build. +environment = { LCS_BUILD_DIR = "{project}/src/libCacheSim/build", MACOSX_DEPLOYMENT_TARGET = "14.0", CMAKE_ARGS = "-DENABLE_3L_CACHE=ON -DENABLE_GLCACHE=ON -DENABLE_LRB=ON" } # Test that the wheel can be imported -test-command = "python -c 'import libcachesim; print(\"Import successful\")'" +test-command = "python -c 'import libcachesim; print(\"Import successful\")'; cp -r {project}/tests .; python -m pytest tests/ -v -m 'not optional'; python -m pytest tests/ -v -m 'optional'" [tool.cibuildwheel.linux] before-all = "yum install -y yum-utils && yum-config-manager --set-enabled crb && yum install -y git && git submodule update --init --recursive && bash scripts/install_deps.sh" diff --git a/scripts/detect_deps.py b/scripts/detect_deps.py index ab66642..5ef26a7 100644 --- a/scripts/detect_deps.py +++ b/scripts/detect_deps.py @@ -9,11 +9,13 @@ import sys import subprocess + def fix_pybind11(): """Fix pybind11 installation""" print("Checking pybind11 installation...") try: import pybind11 + print("✓ pybind11 is installed") # Check CMake config try: @@ -29,6 +31,7 @@ def fix_pybind11(): subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", "pybind11"], check=True) print("✓ pybind11 reinstalled successfully") import pybind11 + cmake_dir = pybind11.get_cmake_dir() print(f"✓ pybind11 CMake directory: {cmake_dir}") return True @@ -36,25 +39,28 @@ def fix_pybind11(): print(f"✗ pybind11 installation failed: {e}") return False + def fix_xgboost(): """Fix xgboost installation""" print("Checking xgboost installation...") try: import xgboost + print("✓ xgboost is installed") # Try to find CMake directory (if available) - cmake_dir = getattr(xgboost, 'cmake_dir', None) + cmake_dir = getattr(xgboost, "cmake_dir", None) if cmake_dir: print(f"✓ xgboost CMake directory: {cmake_dir}") else: # Try common install locations import os + possible_dirs = [ - os.path.join(xgboost.__path__[0], 'cmake'), - os.path.join(xgboost.__path__[0], '..', 'cmake'), - '/usr/local/lib/cmake/xgboost', - '/usr/local/share/cmake/xgboost', - '/opt/homebrew/lib/cmake/xgboost', + os.path.join(xgboost.__path__[0], "cmake"), + os.path.join(xgboost.__path__[0], "..", "cmake"), + "/usr/local/lib/cmake/xgboost", + "/usr/local/share/cmake/xgboost", + "/opt/homebrew/lib/cmake/xgboost", ] found = False for d in possible_dirs: @@ -72,19 +78,21 @@ def fix_xgboost(): subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", "xgboost"], check=True) print("✓ xgboost reinstalled successfully") import xgboost + print("✓ xgboost is installed after reinstall") # Repeat CMake dir check after reinstall - cmake_dir = getattr(xgboost, 'cmake_dir', None) + cmake_dir = getattr(xgboost, "cmake_dir", None) if cmake_dir: print(f"✓ xgboost CMake directory: {cmake_dir}") else: import os + possible_dirs = [ - os.path.join(xgboost.__path__[0], 'cmake'), - os.path.join(xgboost.__path__[0], '..', 'cmake'), - '/usr/local/lib/cmake/xgboost', - '/usr/local/share/cmake/xgboost', - '/opt/homebrew/lib/cmake/xgboost', + os.path.join(xgboost.__path__[0], "cmake"), + os.path.join(xgboost.__path__[0], "..", "cmake"), + "/usr/local/lib/cmake/xgboost", + "/usr/local/share/cmake/xgboost", + "/opt/homebrew/lib/cmake/xgboost", ] found = False for d in possible_dirs: @@ -99,24 +107,27 @@ def fix_xgboost(): print(f"✗ xgboost installation failed: {e}") return False + def fix_lightgbm(): """Fix lightgbm installation""" print("Checking lightgbm installation...") try: import lightgbm + print("✓ lightgbm is installed") # Try to find CMake directory (if available) - cmake_dir = getattr(lightgbm, 'cmake_dir', None) + cmake_dir = getattr(lightgbm, "cmake_dir", None) if cmake_dir: print(f"✓ lightgbm CMake directory: {cmake_dir}") else: import os + possible_dirs = [ - os.path.join(lightgbm.__path__[0], 'cmake'), - os.path.join(lightgbm.__path__[0], '..', 'cmake'), - '/usr/local/lib/cmake/LightGBM', - '/usr/local/share/cmake/LightGBM', - '/opt/homebrew/lib/cmake/LightGBM', + os.path.join(lightgbm.__path__[0], "cmake"), + os.path.join(lightgbm.__path__[0], "..", "cmake"), + "/usr/local/lib/cmake/LightGBM", + "/usr/local/share/cmake/LightGBM", + "/opt/homebrew/lib/cmake/LightGBM", ] found = False for d in possible_dirs: @@ -134,19 +145,21 @@ def fix_lightgbm(): subprocess.run([sys.executable, "-m", "pip", "install", "--force-reinstall", "lightgbm"], check=True) print("✓ lightgbm reinstalled successfully") import lightgbm + print("✓ lightgbm is installed after reinstall") # Repeat CMake dir check after reinstall - cmake_dir = getattr(lightgbm, 'cmake_dir', None) + cmake_dir = getattr(lightgbm, "cmake_dir", None) if cmake_dir: print(f"✓ lightgbm CMake directory: {cmake_dir}") else: import os + possible_dirs = [ - os.path.join(lightgbm.__path__[0], 'cmake'), - os.path.join(lightgbm.__path__[0], '..', 'cmake'), - '/usr/local/lib/cmake/LightGBM', - '/usr/local/share/cmake/LightGBM', - '/opt/homebrew/lib/cmake/LightGBM', + os.path.join(lightgbm.__path__[0], "cmake"), + os.path.join(lightgbm.__path__[0], "..", "cmake"), + "/usr/local/lib/cmake/LightGBM", + "/usr/local/share/cmake/LightGBM", + "/opt/homebrew/lib/cmake/LightGBM", ] found = False for d in possible_dirs: @@ -161,6 +174,7 @@ def fix_lightgbm(): print(f"✗ lightgbm installation failed: {e}") return False + def detect_dependencies(): """Detect dependencies for the project""" print("Detecting dependencies...") @@ -170,5 +184,6 @@ def detect_dependencies(): fix_xgboost() fix_lightgbm() + if __name__ == "__main__": - detect_dependencies() \ No newline at end of file + detect_dependencies() diff --git a/scripts/smart_build.py b/scripts/smart_build.py index 0efb783..871845f 100644 --- a/scripts/smart_build.py +++ b/scripts/smart_build.py @@ -9,17 +9,17 @@ import os import platform + def get_macos_deployment_target(): """Get appropriate macOS deployment target""" if sys.platform != "darwin": return None - + try: - result = subprocess.run(["sw_vers", "-productVersion"], - capture_output=True, text=True, check=True) + result = subprocess.run(["sw_vers", "-productVersion"], capture_output=True, text=True, check=True) macos_version = result.stdout.strip() - major_version = macos_version.split('.')[0] - + major_version = macos_version.split(".")[0] + # Set deployment target to current version deployment_target = f"{major_version}.0" print(f"Detected macOS version: {macos_version}, set deployment target: {deployment_target}") @@ -28,6 +28,7 @@ def get_macos_deployment_target(): print(f"Failed to detect macOS version, using default: {e}") return "14.0" + def check_dependency(module_name): """Check if a Python module is installed""" try: @@ -36,77 +37,81 @@ def check_dependency(module_name): except ImportError: return False + def fix_pybind11(): """Fix pybind11 installation""" print("Checking pybind11...") subprocess.run([sys.executable, "scripts/fix_pybind11.py"], check=True) + def build_with_flags(): """Build according to dependencies""" # Fix pybind11 fix_pybind11() - + # Check ML dependencies xgboost_available = check_dependency("xgboost") lightgbm_available = check_dependency("lightgbm") - + print(f"XGBoost available: {xgboost_available}") print(f"LightGBM available: {lightgbm_available}") - + # Build CMake args cmake_args = ["-G", "Ninja"] - + # Add pybind11 path try: import pybind11 + pybind11_dir = pybind11.get_cmake_dir() cmake_args.extend([f"-Dpybind11_DIR={pybind11_dir}"]) print(f"Set pybind11 path: {pybind11_dir}") except Exception as e: print(f"Warning: failed to set pybind11 path: {e}") - + # Enable GLCache if XGBoost is available if xgboost_available: cmake_args.extend(["-DENABLE_GLCACHE=ON"]) print("Enable GLCache (requires XGBoost)") - + # Enable LRB and 3LCache if LightGBM is available if lightgbm_available: cmake_args.extend(["-DENABLE_LRB=ON", "-DENABLE_3L_CACHE=ON"]) print("Enable LRB and 3LCache (requires LightGBM)") - + # Set macOS deployment target deployment_target = get_macos_deployment_target() if deployment_target: cmake_args.extend([f"-DCMAKE_OSX_DEPLOYMENT_TARGET={deployment_target}"]) - + # Build commands build_dir = "src/libCacheSim/build" source_dir = "." - + # Clean build directory if os.path.exists(build_dir): print("Cleaning build directory...") subprocess.run(["rm", "-rf", build_dir], check=True) - + # Run CMake configure cmake_cmd = ["cmake", "-S", source_dir, "-B", build_dir] + cmake_args print(f"Running: {' '.join(cmake_cmd)}") subprocess.run(cmake_cmd, check=True) - + # Run build build_cmd = ["cmake", "--build", build_dir] print(f"Running: {' '.join(build_cmd)}") subprocess.run(build_cmd, check=True) - + print("✓ Build completed!") + def main(): print("=== libCacheSim Smart Build ===") print(f"Platform: {platform.platform()}") print(f"Python: {sys.version}") print() - + try: build_with_flags() except subprocess.CalledProcessError as e: @@ -116,5 +121,6 @@ def main(): print(f"✗ Build exception: {e}") sys.exit(1) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/tests/reference.csv b/tests/reference.csv deleted file mode 100644 index cb569d0..0000000 --- a/tests/reference.csv +++ /dev/null @@ -1,20 +0,0 @@ -FIFO,0.01,0.8368 -ARC,0.01,0.8222 -Clock,0.01,0.8328 -LRB,0.01,0.8339 -LRU,0.01,0.8339 -S3FIFO,0.01,0.8235 -Sieve,0.01,0.8231 -3LCache,0.01,0.8339 -TinyLFU,0.01,0.8262 -TwoQ,0.01,0.8276 -FIFO,0.1,0.8075 -ARC,0.1,0.7688 -Clock,0.1,0.8086 -LRB,0.1,0.8097 -LRU,0.1,0.8097 -S3FIFO,0.1,0.7542 -Sieve,0.1,0.7903 -3LCache,0.1,0.8097 -TinyLFU,0.1,0.7666 -TwoQ,0.1,0.7695 diff --git a/tests/test_cache.py b/tests/test_cache.py index 108e6fd..c339b91 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -9,40 +9,57 @@ import os from libcachesim import ( # Basic algorithms - LRU, FIFO, LFU, ARC, Clock, Random, + LRU, + FIFO, + LFU, + ARC, + Clock, + Random, # Advanced algorithms - S3FIFO, Sieve, LIRS, TwoQ, SLRU, WTinyLFU, + S3FIFO, + Sieve, + LIRS, + TwoQ, + SLRU, + WTinyLFU, # Request and other utilities - Request, ReqOp, SyntheticReader + Request, + ReqOp, + SyntheticReader, ) # Try to import optional algorithms that might not be available try: from libcachesim import LeCaR, LFUDA, ClockPro, Cacheus + OPTIONAL_ALGORITHMS = [LeCaR, LFUDA, ClockPro, Cacheus] except ImportError: OPTIONAL_ALGORITHMS = [] try: from libcachesim import Belady, BeladySize + OPTIMAL_ALGORITHMS = [Belady, BeladySize] except ImportError: OPTIMAL_ALGORITHMS = [] try: from libcachesim import LRUProb, FlashProb + PROBABILISTIC_ALGORITHMS = [LRUProb, FlashProb] except ImportError: PROBABILISTIC_ALGORITHMS = [] try: from libcachesim import Size, GDSF + SIZE_BASED_ALGORITHMS = [Size, GDSF] except ImportError: SIZE_BASED_ALGORITHMS = [] try: from libcachesim import Hyperbolic + HYPERBOLIC_ALGORITHMS = [Hyperbolic] except ImportError: HYPERBOLIC_ALGORITHMS = [] @@ -51,43 +68,63 @@ class TestCacheBasicFunctionality: """Test basic cache functionality across different algorithms""" - @pytest.mark.parametrize("cache_class", [ - LRU, FIFO, LFU, ARC, Clock, Random, - S3FIFO, Sieve, LIRS, TwoQ, SLRU, WTinyLFU, LeCaR, LFUDA, ClockPro, Cacheus, - LRUProb, FlashProb, Size, GDSF, Hyperbolic - ]) + @pytest.mark.parametrize( + "cache_class", + [ + LRU, + FIFO, + LFU, + ARC, + Clock, + Random, + S3FIFO, + Sieve, + LIRS, + TwoQ, + SLRU, + WTinyLFU, + LeCaR, + LFUDA, + ClockPro, + Cacheus, + LRUProb, + FlashProb, + Size, + GDSF, + Hyperbolic, + ], + ) def test_cache_initialization(self, cache_class): """Test that all cache types can be initialized with different sizes""" - cache_sizes = [1024, 1024*1024, 1024*1024*1024] # 1KB, 1MB, 1GB - + cache_sizes = [1024, 1024 * 1024, 1024 * 1024 * 1024] # 1KB, 1MB, 1GB + for size in cache_sizes: try: cache = cache_class(size) assert cache is not None - assert hasattr(cache, 'get') - assert hasattr(cache, 'insert') - assert hasattr(cache, 'find') + assert hasattr(cache, "get") + assert hasattr(cache, "insert") + assert hasattr(cache, "find") except Exception as e: pytest.skip(f"Cache {cache_class.__name__} failed to initialize: {e}") - @pytest.mark.parametrize("cache_class", [ - LRU, FIFO, LFU, ARC, Clock, Random, - S3FIFO, Sieve, LIRS, TwoQ, SLRU, WTinyLFU - ]) + @pytest.mark.parametrize( + "cache_class", [LRU, FIFO, LFU, ARC, Clock, Random, S3FIFO, Sieve, LIRS, TwoQ, SLRU, WTinyLFU] + ) def test_basic_get_and_insert(self, cache_class): """Test basic get and insert operations""" - cache = cache_class(1024*1024) # 1MB cache - + cache = cache_class(1024 * 1024) # 1MB cache + # Create a request req = Request() req.obj_id = 1 req.obj_size = 100 req.op = ReqOp.OP_GET - + # Initially, object should not be in cache hit = cache.get(req) assert hit == False - + # Insert the object if cache_class != LIRS: cache_obj = cache.insert(req) @@ -96,20 +133,41 @@ def test_basic_get_and_insert(self, cache_class): assert cache_obj.obj_size == 100 else: assert cache.insert(req) is None - + # Now it should be a hit hit = cache.get(req) assert hit == True - @pytest.mark.parametrize("cache_class", [ - LRU, FIFO, LFU, ARC, Clock, Random, - S3FIFO, Sieve, LIRS, TwoQ, SLRU, WTinyLFU, LeCaR, LFUDA, ClockPro, Cacheus, - LRUProb, FlashProb, Size, GDSF, Hyperbolic - ]) + @pytest.mark.parametrize( + "cache_class", + [ + LRU, + FIFO, + LFU, + ARC, + Clock, + Random, + S3FIFO, + Sieve, + LIRS, + TwoQ, + SLRU, + WTinyLFU, + LeCaR, + LFUDA, + ClockPro, + Cacheus, + LRUProb, + FlashProb, + Size, + GDSF, + Hyperbolic, + ], + ) def test_cache_eviction(self, cache_class): """Test that cache eviction works when cache is full""" - cache = cache_class(1024*1024) # 1MB cache - + cache = cache_class(1024 * 1024) # 1MB cache + if cache_class == GDSF: pytest.skip("GDSF should be used with find/get but not insert") @@ -120,9 +178,9 @@ def test_cache_eviction(self, cache_class): req.obj_size = 50 # Each object is 50 bytes req.op = ReqOp.OP_GET req.next_access_vtime = 100 + i - + cache.insert(req) - + # Try to insert one more object req = Request() req.obj_id = 999 @@ -131,59 +189,101 @@ def test_cache_eviction(self, cache_class): req.op = ReqOp.OP_GET cache.insert(req) - @pytest.mark.parametrize("cache_class", [ - LRU, FIFO, LFU, ARC, Clock, Random, - S3FIFO, Sieve, LIRS, TwoQ, SLRU, WTinyLFU, LeCaR, LFUDA, ClockPro, Cacheus, - LRUProb, FlashProb, Size, GDSF, Hyperbolic - ]) + @pytest.mark.parametrize( + "cache_class", + [ + LRU, + FIFO, + LFU, + ARC, + Clock, + Random, + S3FIFO, + Sieve, + LIRS, + TwoQ, + SLRU, + WTinyLFU, + LeCaR, + LFUDA, + ClockPro, + Cacheus, + LRUProb, + FlashProb, + Size, + GDSF, + Hyperbolic, + ], + ) def test_cache_find_method(self, cache_class): """Test the find method functionality""" cache = cache_class(1024) - + req = Request() req.obj_id = 1 req.obj_size = 100 req.op = ReqOp.OP_GET - + # Initially should not find the object cache_obj = cache.find(req, update_cache=False) assert cache_obj is None - + # Insert the object cache.insert(req) - + # Now should find it cache_obj = cache.find(req, update_cache=False) assert cache_obj is not None assert cache_obj.obj_id == 1 - @pytest.mark.parametrize("cache_class", [ - LRU, FIFO, LFU, ARC, Clock, Random, - S3FIFO, Sieve, LIRS, TwoQ, SLRU, WTinyLFU, LeCaR, LFUDA, ClockPro, Cacheus, - LRUProb, FlashProb, Size, GDSF, Hyperbolic - ]) + @pytest.mark.parametrize( + "cache_class", + [ + LRU, + FIFO, + LFU, + ARC, + Clock, + Random, + S3FIFO, + Sieve, + LIRS, + TwoQ, + SLRU, + WTinyLFU, + LeCaR, + LFUDA, + ClockPro, + Cacheus, + LRUProb, + FlashProb, + Size, + GDSF, + Hyperbolic, + ], + ) def test_cache_can_insert(self, cache_class): """Test can_insert method""" - cache = cache_class(1024*1024) - + cache = cache_class(1024 * 1024) + req = Request() req.obj_id = 1 req.obj_size = 100 req.op = ReqOp.OP_GET - + # Should be able to insert initially can_insert = cache.can_insert(req) assert can_insert == True - + # Insert the object cache.insert(req) - + # Try to insert a larger object that won't fit req2 = Request() req2.obj_id = 2 req2.obj_size = 150 # Too large for remaining space req2.op = ReqOp.OP_GET - + can_insert = cache.can_insert(req2) # Some algorithms might still return True if they can evict assert can_insert in [True, False] @@ -195,12 +295,12 @@ class TestCacheEdgeCases: def test_zero_size_cache(self): """Test cache with zero size""" cache = LRU(0) - + req = Request() req.obj_id = 1 req.obj_size = 100 req.op = ReqOp.OP_GET - + # Should not be able to insert can_insert = cache.can_insert(req) assert can_insert == False @@ -208,12 +308,12 @@ def test_zero_size_cache(self): def test_large_object(self): """Test inserting object larger than cache size""" cache = LRU(100) - + req = Request() req.obj_id = 1 req.obj_size = 200 # Larger than cache req.op = ReqOp.OP_GET - + # Should not be able to insert can_insert = cache.can_insert(req) assert can_insert == False @@ -227,12 +327,12 @@ def test_string_object_id(self): def test_zero_size_object(self): """Test with zero size object""" cache = LRU(1024) - + req = Request() req.obj_id = 1 req.obj_size = 0 req.op = ReqOp.OP_GET - + # Should work fine cache.insert(req) hit = cache.get(req) @@ -245,46 +345,33 @@ class TestCacheWithSyntheticTrace: def test_cache_with_zipf_trace(self): """Test cache performance with Zipf distribution""" # Create synthetic reader with Zipf distribution - reader = SyntheticReader( - num_of_req=1000, - obj_size=100, - alpha=1.0, - dist="zipf", - num_objects=100, - seed=42 - ) - + reader = SyntheticReader(num_of_req=1000, obj_size=100, alpha=1.0, dist="zipf", num_objects=100, seed=42) + # Test with different cache algorithms cache_algorithms = [LRU, FIFO, LFU, S3FIFO, Sieve] - + for cache_class in cache_algorithms: cache = cache_class(1024) # 1KB cache - + # Process the trace miss_ratio, _ = cache.process_trace(reader) - + # Basic sanity checks assert 0.0 <= miss_ratio <= 1.0 - + # Reset reader for next test reader.reset() def test_cache_with_uniform_trace(self): """Test cache performance with uniform distribution""" # Create synthetic reader with uniform distribution - reader = SyntheticReader( - num_of_req=500, - obj_size=50, - dist="uniform", - num_objects=50, - seed=123 - ) - + reader = SyntheticReader(num_of_req=500, obj_size=50, dist="uniform", num_objects=50, seed=123) + cache = LRU(512) # 512B cache - + # Process the trace miss_ratio, _ = cache.process_trace(reader) - + # Basic sanity checks assert 0.0 <= miss_ratio <= 1.0 @@ -295,18 +382,18 @@ class TestCacheStatistics: def test_cache_occupied_bytes(self): """Test get_occupied_byte method""" cache = LRU(1024) - + # Initially should be 0 occupied = cache.get_occupied_byte() assert occupied == 0 - + # Insert an object req = Request() req.obj_id = 1 req.obj_size = 100 req.op = ReqOp.OP_GET cache.insert(req) - + # Should reflect the inserted object size occupied = cache.get_occupied_byte() assert occupied >= 100 # May include metadata overhead @@ -314,11 +401,11 @@ def test_cache_occupied_bytes(self): def test_cache_object_count(self): """Test get_n_obj method""" cache = LRU(1024) - + # Initially should be 0 n_obj = cache.get_n_obj() assert n_obj == 0 - + # Insert objects for i in range(3): req = Request() @@ -326,7 +413,7 @@ def test_cache_object_count(self): req.obj_size = 100 req.op = ReqOp.OP_GET cache.insert(req) - + # Should have 3 objects n_obj = cache.get_n_obj() assert n_obj == 3 @@ -334,14 +421,14 @@ def test_cache_object_count(self): def test_cache_print(self): """Test print_cache method""" cache = LRU(1024) - + # Insert an object req = Request() req.obj_id = 1 req.obj_size = 100 req.op = ReqOp.OP_GET cache.insert(req) - + # Should return a string representation cache.print_cache() @@ -352,22 +439,22 @@ class TestCacheOperations: def test_cache_remove(self): """Test remove method""" cache = LRU(1024) - + # Insert an object req = Request() req.obj_id = 1 req.obj_size = 100 req.op = ReqOp.OP_GET cache.insert(req) - + # Verify it's in cache hit = cache.get(req) assert hit == True - + # Remove it removed = cache.remove(1) assert removed == True - + # Verify it's no longer in cache hit = cache.get(req) assert hit == False @@ -375,7 +462,7 @@ def test_cache_remove(self): def test_cache_need_eviction(self): """Test need_eviction method""" cache = LRU(200) - + # Insert objects until cache is nearly full for i in range(3): req = Request() @@ -383,13 +470,13 @@ def test_cache_need_eviction(self): req.obj_size = 50 req.op = ReqOp.OP_GET cache.insert(req) - + # Try to insert a larger object req = Request() req.obj_id = 999 req.obj_size = 100 req.op = ReqOp.OP_GET - + # Should need eviction need_eviction = cache.need_eviction(req) assert need_eviction == True @@ -397,7 +484,7 @@ def test_cache_need_eviction(self): def test_cache_to_evict(self): """Test to_evict method""" cache = LRU(200) - + # Insert objects for i in range(3): req = Request() @@ -405,14 +492,42 @@ def test_cache_to_evict(self): req.obj_size = 50 req.op = ReqOp.OP_GET cache.insert(req) - + # Try to insert a larger object req = Request() req.obj_id = 999 req.obj_size = 100 req.op = ReqOp.OP_GET - + # Should return an object to evict evict_obj = cache.to_evict(req) assert evict_obj is not None - assert hasattr(evict_obj, 'obj_id') \ No newline at end of file + assert hasattr(evict_obj, "obj_id") + + +class TestCacheOptionalAlgorithms: + """Test optional algorithms""" + + @pytest.mark.optional + def test_glcache(self): + """Test GLCache algorithm""" + from libcachesim import GLCache + + cache = GLCache(1024) + assert cache is not None + + @pytest.mark.optional + def test_lrb(self): + """Test LRB algorithm""" + from libcachesim import LRB + + cache = LRB(1024) + assert cache is not None + + @pytest.mark.optional + def test_3lcache(self): + """Test 3LCache algorithm""" + from libcachesim import ThreeLCache + + cache = ThreeLCache(1024) + assert cache is not None