Skip to content

Commit f2d2406

Browse files
committed
feat: Add inputAudioTranscription support to Java ADK
1 parent 649480e commit f2d2406

File tree

1 file changed

+7
-8
lines changed

1 file changed

+7
-8
lines changed

core/src/main/java/com/google/adk/runner/Runner.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -310,25 +310,24 @@ public Flowable<Event> runAsync(Session session, Content newMessage, RunConfig r
310310
private InvocationContext newInvocationContextForLive(
311311
Session session, Optional<LiveRequestQueue> liveRequestQueue, RunConfig runConfig) {
312312
RunConfig.Builder runConfigBuilder = RunConfig.builder(runConfig);
313-
if (!CollectionUtils.isNullOrEmpty(runConfig.responseModalities())
314-
&& liveRequestQueue.isPresent()) {
313+
if (liveRequestQueue.isPresent()) {
315314
// Default to AUDIO modality if not specified.
316315
if (CollectionUtils.isNullOrEmpty(runConfig.responseModalities())) {
317316
runConfigBuilder.setResponseModalities(
318317
ImmutableList.of(new Modality(Modality.Known.AUDIO)));
319318
if (runConfig.outputAudioTranscription() == null) {
320319
runConfigBuilder.setOutputAudioTranscription(AudioTranscriptionConfig.builder().build());
321320
}
322-
if (runConfig.inputAudioTranscription() == null) {
323-
runConfigBuilder.setInputAudioTranscription(AudioTranscriptionConfig.builder().build());
324-
}
325321
} else if (!runConfig.responseModalities().contains(new Modality(Modality.Known.TEXT))) {
326322
if (runConfig.outputAudioTranscription() == null) {
327323
runConfigBuilder.setOutputAudioTranscription(AudioTranscriptionConfig.builder().build());
328324
}
329-
if (runConfig.inputAudioTranscription() == null) {
330-
runConfigBuilder.setInputAudioTranscription(AudioTranscriptionConfig.builder().build());
331-
}
325+
}
326+
// Parity with Python: only auto-enable input transcription for multi-agent live scenarios
327+
// so that text can be passed between agents. Otherwise leave it as-is.
328+
boolean isMultiAgent = !this.agent.subAgents().isEmpty();
329+
if (isMultiAgent && runConfig.inputAudioTranscription() == null) {
330+
runConfigBuilder.setInputAudioTranscription(AudioTranscriptionConfig.builder().build());
332331
}
333332
}
334333
return newInvocationContext(

0 commit comments

Comments
 (0)