@@ -310,25 +310,24 @@ public Flowable<Event> runAsync(Session session, Content newMessage, RunConfig r
310310 private InvocationContext newInvocationContextForLive (
311311 Session session , Optional <LiveRequestQueue > liveRequestQueue , RunConfig runConfig ) {
312312 RunConfig .Builder runConfigBuilder = RunConfig .builder (runConfig );
313- if (!CollectionUtils .isNullOrEmpty (runConfig .responseModalities ())
314- && liveRequestQueue .isPresent ()) {
313+ if (liveRequestQueue .isPresent ()) {
315314 // Default to AUDIO modality if not specified.
316315 if (CollectionUtils .isNullOrEmpty (runConfig .responseModalities ())) {
317316 runConfigBuilder .setResponseModalities (
318317 ImmutableList .of (new Modality (Modality .Known .AUDIO )));
319318 if (runConfig .outputAudioTranscription () == null ) {
320319 runConfigBuilder .setOutputAudioTranscription (AudioTranscriptionConfig .builder ().build ());
321320 }
322- if (runConfig .inputAudioTranscription () == null ) {
323- runConfigBuilder .setInputAudioTranscription (AudioTranscriptionConfig .builder ().build ());
324- }
325321 } else if (!runConfig .responseModalities ().contains (new Modality (Modality .Known .TEXT ))) {
326322 if (runConfig .outputAudioTranscription () == null ) {
327323 runConfigBuilder .setOutputAudioTranscription (AudioTranscriptionConfig .builder ().build ());
328324 }
329- if (runConfig .inputAudioTranscription () == null ) {
330- runConfigBuilder .setInputAudioTranscription (AudioTranscriptionConfig .builder ().build ());
331- }
325+ }
326+ // Parity with Python: only auto-enable input transcription for multi-agent live scenarios
327+ // so that text can be passed between agents. Otherwise leave it as-is.
328+ boolean isMultiAgent = !this .agent .subAgents ().isEmpty ();
329+ if (isMultiAgent && runConfig .inputAudioTranscription () == null ) {
330+ runConfigBuilder .setInputAudioTranscription (AudioTranscriptionConfig .builder ().build ());
332331 }
333332 }
334333 return newInvocationContext (
0 commit comments