@@ -595,6 +595,21 @@ class WindowBlockManager
595595
596596 ~WindowBlockManager ();
597597
598+ [[nodiscard]] bool isEnableIndexerKCache () const
599+ {
600+ return mEnableIndexerKCache ;
601+ }
602+
603+ [[nodiscard]] SizeType32 getIndexerKCacheQuantBlockSize () const
604+ {
605+ return mIndexerKCacheQuantBlockSize ;
606+ }
607+
608+ [[nodiscard]] SizeType32 getIndexerKCacheIndexHeadDim () const
609+ {
610+ return mIndexerKCacheIndexHeadDim ;
611+ }
612+
598613 void allocatePools (bool useUvm);
599614
600615 void releasePools ();
@@ -809,6 +824,9 @@ class WindowBlockManager
809824 return mBufferManager ;
810825 }
811826
827+ // ! \brief Sync internal streams used by transfer manager with buffer manager stream
828+ void syncTransferManagerWithBufferManager ();
829+
812830 // ! \brief Perform per-request bookkeeping
813831 void refreshBlocks ();
814832
@@ -1021,6 +1039,21 @@ class BlockManager
10211039 std::optional<kvc::BaseAgentConfig> agentConfig = std::nullopt , bool enableIndexerKCache = false ,
10221040 SizeType32 indexerKCacheQuantBlockSize = 128 , SizeType32 indexerKCacheIndexHeadDim = 0 );
10231041
1042+ [[nodiscard]] bool isEnableIndexerKCache () const
1043+ {
1044+ return mIsEnableIndexerKCache ;
1045+ }
1046+
1047+ [[nodiscard]] SizeType32 getIndexerKCacheQuantBlockSize () const
1048+ {
1049+ return mIndexerKCacheQuantBlockSize ;
1050+ }
1051+
1052+ [[nodiscard]] SizeType32 getIndexerKCacheIndexHeadDim () const
1053+ {
1054+ return mIndexerKCacheIndexHeadDim ;
1055+ }
1056+
10241057 BlockManager (BlockManager const &) = delete ;
10251058 BlockManager& operator =(BlockManager const &) = delete ;
10261059
@@ -1283,6 +1316,9 @@ class BlockManager
12831316 // ! \brief Store newest block for reuse
12841317 void storeNewBlock (GenerationRequest& sequence, OptionalRef<LlmRequest const > llmRequest);
12851318
1319+ // ! \brief Sync internal streams used by transfer manager with buffer manager stream
1320+ void syncTransferManagerWithBufferManager ();
1321+
12861322 // ! \brief Perform per-request bookkeeping
12871323 void refreshBlocks ();
12881324
@@ -1398,6 +1434,10 @@ class BlockManager
13981434 std::vector<SizeType32> mAbsolutePoolToRelativePoolIndex ;
13991435 // Record what sequences are currently managed by the block manager
14001436 std::set<LlmRequest::RequestIdType> mManagedSequences ;
1437+
1438+ bool mIsEnableIndexerKCache {false };
1439+ SizeType32 mIndexerKCacheQuantBlockSize {0 };
1440+ SizeType32 mIndexerKCacheIndexHeadDim {0 };
14011441};
14021442
14031443struct OffsetTableDimensions
@@ -1500,6 +1540,10 @@ class BaseKVCacheManager
15001540
15011541 [[nodiscard]] virtual bool isEnableBlockReuse () const = 0;
15021542
1543+ [[nodiscard]] virtual bool isEnableIndexerKCache () const = 0;
1544+ [[nodiscard]] virtual SizeType32 getIndexerKCacheIndexHeadDim () const = 0;
1545+ [[nodiscard]] virtual SizeType32 getIndexerKCacheQuantBlockSize () const = 0;
1546+
15031547 // void removeToken(SizeType32 seqSlotIdx);
15041548 virtual void rewindKVCache (LlmRequest::RequestIdType requestId, SizeType32 rewindLengths) = 0;
15051549
@@ -1546,6 +1590,7 @@ class BaseKVCacheManager
15461590 [[nodiscard]] virtual runtime::ITensor::SharedPtr getIndexerKCachePool () const = 0;
15471591 [[nodiscard]] virtual SizeType32 getPoolLayerIdx (SizeType32 layer_idx) const = 0;
15481592
1593+ virtual void syncTransferManagerWithBufferManager () = 0;
15491594 virtual void refreshBlocks () = 0;
15501595 virtual void flushIterationEvents () = 0;
15511596 virtual void resetReuseState () = 0;
@@ -1834,6 +1879,21 @@ class KVCacheManager : public BaseKVCacheManager
18341879 return mEnableBlockReuse ;
18351880 }
18361881
1882+ [[nodiscard]] bool isEnableIndexerKCache () const override
1883+ {
1884+ return mBlockManager .isEnableIndexerKCache ();
1885+ }
1886+
1887+ [[nodiscard]] SizeType32 getIndexerKCacheIndexHeadDim () const override
1888+ {
1889+ return mBlockManager .getIndexerKCacheIndexHeadDim ();
1890+ }
1891+
1892+ [[nodiscard]] SizeType32 getIndexerKCacheQuantBlockSize () const override
1893+ {
1894+ return mBlockManager .getIndexerKCacheQuantBlockSize ();
1895+ }
1896+
18371897 void removeToken (LlmRequest::RequestIdType requestId);
18381898 void rewindKVCache (LlmRequest::RequestIdType requestId, SizeType32 rewindLengths) override ;
18391899
@@ -1912,6 +1972,11 @@ class KVCacheManager : public BaseKVCacheManager
19121972 return mBlockManager .getPoolLayerIdx (layer_idx);
19131973 }
19141974
1975+ void syncTransferManagerWithBufferManager () override
1976+ {
1977+ mBlockManager .syncTransferManagerWithBufferManager ();
1978+ }
1979+
19151980 // ! \brief Perform per-iteration bookkeeping
19161981 void refreshBlocks () override
19171982 {
0 commit comments