File tree 2 files changed +7
-1
lines changed
2 files changed +7
-1
lines changed Original file line number Diff line number Diff line change 9
9
from vllm .sequence import Sequence , SequenceGroup , SequenceStatus
10
10
from vllm .utils import Device
11
11
from vllm .core .evictor import Evictor , EvictionPolicy , make_evictor
12
+ from vllm .logger import init_logger
13
+
14
+ logger = init_logger (__name__ )
12
15
13
16
14
17
class BlockAllocatorBase (ABC ):
@@ -241,11 +244,13 @@ def __init__(
241
244
self .watermark_blocks = int (watermark * num_gpu_blocks )
242
245
243
246
if self .enable_caching :
247
+ logger .info ("enable automatic prefix caching" )
244
248
self .gpu_allocator = CachedBlockAllocator (Device .GPU , block_size ,
245
249
num_gpu_blocks )
246
250
self .cpu_allocator = CachedBlockAllocator (Device .CPU , block_size ,
247
251
num_cpu_blocks )
248
252
else :
253
+ logger .info ("disable automatic prefix caching" )
249
254
self .gpu_allocator = UncachedBlockAllocator (
250
255
Device .GPU , block_size , num_gpu_blocks )
251
256
self .cpu_allocator = UncachedBlockAllocator (
Original file line number Diff line number Diff line change @@ -337,7 +337,8 @@ def create_engine_configs(
337
337
cache_config = CacheConfig (self .block_size ,
338
338
self .gpu_memory_utilization ,
339
339
self .swap_space , self .kv_cache_dtype ,
340
- model_config .get_sliding_window ())
340
+ model_config .get_sliding_window (),
341
+ self .enable_prefix_caching )
341
342
parallel_config = ParallelConfig (
342
343
self .pipeline_parallel_size , self .tensor_parallel_size ,
343
344
self .worker_use_ray , self .max_parallel_loading_workers ,
You can’t perform that action at this time.
0 commit comments