Skip to content

Commit a810c12

Browse files
committed
Fix failed tests
Signed-off-by: Iman Tabrizian <[email protected]>
1 parent a24a6a3 commit a810c12

File tree

3 files changed

+8
-3
lines changed

3 files changed

+8
-3
lines changed

cpp/tensorrt_llm/batch_manager/cacheFormatter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ BlockRange getBlockRangeForSending(BaseKVCacheManager* cacheManager, LlmRequest
4747
{
4848
auto poolNum = cacheManager->getBlockManager().getNumPools(
4949
/*includeBlockScalePools=*/false, /*includeIndexerKCachePools=*/false);
50+
51+
// Note: When recv side has CP, the requested seqLen is lesser than seqLen on the sender side as seqLen is
52+
// distributed among CP ranks. So, we transfer all blocks from send side.
5053
if (poolNum > 1 || !cacheManager->isEnableBlockReuse() || lastBlockKey.uniqueTokens.size() == 0 || recvSideHasCP)
5154
{
5255
// disable reuse path, and vwsa don't support reuse.

cpp/tests/unit_tests/multi_gpu/cacheTransceiverTest.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,7 +1022,8 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
10221022
}
10231023
int kvFactor = mCacheState->getAttentionConfig().mKvFactor;
10241024
int tokensPerBlock = mCacheState->getModelConfig().mTokensPerBlock;
1025-
int startTokenId = (blockId * mCpSize + mCpRank) * tokensPerBlock;
1025+
// We don't account for CP here because contextCP is always 1 currently.
1026+
int startTokenId = blockId * tokensPerBlock;
10261027
int sizePerHead;
10271028
if (isIndexerKCache)
10281029
{
@@ -1117,7 +1118,8 @@ class AsymmetricalCacheTest : public ::testing::TestWithParam<AsymmetricTestPara
11171118
}
11181119
int kvFactor = mCacheState->getAttentionConfig().mKvFactor;
11191120
int tokensPerBlock = mCacheState->getModelConfig().mTokensPerBlock;
1120-
int startTokenId = (blockId * mCpSize + mCpRank) * tokensPerBlock;
1121+
// We don't account for CP here because contextCP is always 1 currently.
1122+
int startTokenId = blockId * tokensPerBlock;
11211123
int sizePerHead;
11221124
if (isIndexerKCache)
11231125
{

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def result(self):
4545
DuckLLM = namedtuple('DuckLLM', ['args', 'tokenizer', 'generate_async'])
4646

4747
DEFAULT_TEST_TIMEOUT = 1800
48-
DEFAULT_SERVER_WAITING_TIMEOUT = 1200
48+
DEFAULT_SERVER_WAITING_TIMEOUT = 3600
4949

5050

5151
class MyThreadPoolExecutor(ThreadPoolExecutor):

0 commit comments

Comments
 (0)