@@ -138,15 +138,6 @@ public String generateResponse(String message, String systemMessage, int maxToke
138138 }
139139 }
140140
141- public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ) {
142- generateStreamingResponse (message , systemMessage , emitter , 150 , 0.7 , 0.9 );
143- }
144-
145- public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ,
146- int maxTokens , double temperature , double topP ) {
147- generateStreamingResponse (message , systemMessage , emitter , maxTokens , temperature , topP , null );
148- }
149-
150141 public void generateStreamingResponse (String message , String systemMessage , SseEmitter emitter ,
151142 int maxTokens , double temperature , double topP , Long seed ) {
152143 CompletableFuture .runAsync (() -> {
@@ -170,11 +161,12 @@ public void generateStreamingResponse(String message, String systemMessage, SseE
170161 promptTokens .addAll (chatFormat .encodeMessage (new ChatFormat .Message (ChatFormat .Role .USER , message )));
171162 promptTokens .addAll (chatFormat .encodeHeader (new ChatFormat .Message (ChatFormat .Role .ASSISTANT , "" )));
172163
173- // Handle reasoning tokens for streaming
164+ // Include reasoning for Deepseek-R1-Distill-Qwen
174165 if (model .shouldIncludeReasoning ()) {
175166 List <Integer > thinkStartTokens = model .tokenizer ().encode ("<think>\n " , model .tokenizer ().getSpecialTokens ().keySet ());
176167 promptTokens .addAll (thinkStartTokens );
177- emitter .send (SseEmitter .event ().data ("<think>\n " )); // Output immediately
168+ // We are in streaming, immediately output the think start
169+ emitter .send (SseEmitter .event ().data ("<think>\n " ));
178170 }
179171
180172 Set <Integer > stopTokens = chatFormat .getStopTokens ();
0 commit comments