apache
diff --git a/‎docs/content.zh/docs/connectors/models/openai.md‎
Lines changed: 27 additions & 0 deletions b/‎docs/content.zh/docs/connectors/models/openai.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎docs/content/docs/connectors/models/openai.md‎
Lines changed: 27 additions & 0 deletions b/‎docs/content/docs/connectors/models/openai.md‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎flink-models/flink-model-openai/pom.xml‎
Lines changed: 6 additions & 0 deletions b/‎flink-models/flink-model-openai/pom.xml‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/AbstractOpenAIModelFunction.java‎
Lines changed: 50 additions & 0 deletions b/‎flink-models/flink-model-openai/src/main/java/org/apache/flink/model/openai/AbstractOpenAIModelFunction.java‎
Lines changed: 50 additions & 0 deletions
@@ -130,6 +130,33 @@ FROM ML_PREDICT(
             <td>String</td>
             <td>模型名称，例如：<code>gpt-3.5-turbo</code>, <code>text-embedding-ada-002</code>。</td>
         </tr>
+        <tr>
+            <td>
+                <h5>max-context-size</h5>
+            </td>
+            <td>可选</td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>Integer</td>
+            <td>单个请求的最大上下文长度，单位为Token数量。当长度超过该值时，将使用context-overflow-action指定的溢出行为。</td>
+        </tr>
+        <tr>
+            <td>
+                <h5>context-overflow-action</h5>
+            </td>
+            <td>可选</td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>String</td>
+            <td>处理上下文溢出的操作。支持的操作：
+                <ul>
+                    <li><code>truncated-tail</code>(默认): 从上下文尾部截断超出的token。</li>
+                    <li><code>truncated-tail-log</code>: 从上下文尾部截断超出的token。记录截断日志。</li>
+                    <li><code>truncated-head</code>: 从上下文头部截断超出的token。</li>
+                    <li><code>truncated-head-log</code>: 从上下文头部截断超出的token。记录截断日志。</li>
+                    <li><code>skipped</code>: 跳过输入行。</li>
+                    <li><code>skipped-log</code>: 跳过输入行。记录跳过日志。</li>
+                </ul>
+            </td>
+        </tr>
     </tbody>
 </table>
 
 
@@ -130,6 +130,33 @@ FROM ML_PREDICT(
             <td>String</td>
             <td>Model name, e.g. <code>gpt-3.5-turbo</code>, <code>text-embedding-ada-002</code>.</td>
         </tr>
+        <tr>
+            <td>
+                <h5>max-context-size</h5>
+            </td>
+            <td>optional</td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>Integer</td>
+            <td>Max number of tokens for context. context-overflow-action would be triggered if this threshold is exceeded.</td>
+        </tr>
+        <tr>
+            <td>
+                <h5>context-overflow-action</h5>
+            </td>
+            <td>optional</td>
+            <td style="word-wrap: break-word;">(none)</td>
+            <td>String</td>
+            <td>Action to handle context overflows. Supported actions:
+                <ul>
+                    <li><code>truncated-tail</code>(default): Truncates exceeded tokens from the tail of the context.</li>
+                    <li><code>truncated-tail-log</code>: Truncates exceeded tokens from the tail of the context. Records the truncation log.</li>
+                    <li><code>truncated-head</code>: Truncates exceeded tokens from the head of the context.</li>
+                    <li><code>truncated-head-log</code>: Truncates exceeded tokens from the head of the context. Records the truncation log.</li>
+                    <li><code>skipped</code>: Skips the input row.</li>
+                    <li><code>skipped-log</code>: Skips the input row. Records the skipping log.</li>
+                </ul>
+            </td>
+        </tr>
     </tbody>
 </table>
 
 
@@ -72,6 +72,12 @@ under the License.
 			<optional>${flink.markBundledAsOptional}</optional>
 		</dependency>
 
+		<dependency>
+			<groupId>com.knuddels</groupId>
+			<artifactId>jtokkit</artifactId>
+			<version>1.1.0</version>
+		</dependency>
+
 		<!-- Core dependencies -->
 		<dependency>
 			<groupId>org.apache.flink</groupId>
 
@@ -25,6 +25,7 @@
 import org.apache.flink.table.api.config.ExecutionConfigOptions;
 import org.apache.flink.table.catalog.Column;
 import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.factories.ModelProviderFactory;
 import org.apache.flink.table.functions.AsyncPredictFunction;
 import org.apache.flink.table.functions.FunctionContext;
@@ -35,7 +36,12 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.annotation.Nullable;
+
+import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.CompletableFuture;
 import java.util.stream.Collectors;
 
 import static org.apache.flink.configuration.description.TextElement.code;
@@ -73,11 +79,32 @@ public abstract class AbstractOpenAIModelFunction extends AsyncPredictFunction {
                                             code("gpt-3.5-turbo"), code("text-embedding-ada-002"))
                                     .build());
 
+    public static final ConfigOption<Integer> MAX_CONTEXT_SIZE =
+            ConfigOptions.key("max-context-size")
+                    .intType()
+                    .noDefaultValue()
+                    .withDescription(
+                            "Max number of tokens for context. context-overflow-action would be triggered if this threshold is exceeded.");
+
+    public static final ConfigOption<ContextOverflowAction> CONTEXT_OVERFLOW_ACTION =
+            ConfigOptions.key("context-overflow-action")
+                    .enumType(ContextOverflowAction.class)
+                    .defaultValue(ContextOverflowAction.TRUNCATED_TAIL)
+                    .withDescription(
+                            Description.builder()
+                                    .text("Action to handle context overflows. Supported actions:")
+                                    .linebreak()
+                                    .text(ContextOverflowAction.getAllValuesAndDescriptions())
+                                    .build());
+
     protected transient OpenAIClientAsync client;
 
     private final int numRetry;
     private final String baseUrl;
     private final String apiKey;
+    private final String model;
+    @Nullable private final Integer maxContextSize;
+    private final ContextOverflowAction contextOverflowAction;
 
     public AbstractOpenAIModelFunction(
             ModelProviderFactory.Context factoryContext, ReadableConfig config) {
@@ -94,6 +121,9 @@ public AbstractOpenAIModelFunction(
         // resilience while maintaining throughput efficiency.
         this.numRetry =
                 config.get(ExecutionConfigOptions.TABLE_EXEC_ASYNC_LOOKUP_BUFFER_CAPACITY) * 10;
+        this.model = config.get(MODEL);
+        this.maxContextSize = config.get(MAX_CONTEXT_SIZE);
+        this.contextOverflowAction = config.get(CONTEXT_OVERFLOW_ACTION);
 
         validateSingleColumnSchema(
                 factoryContext.getCatalogModel().getResolvedInputSchema(),
@@ -106,6 +136,24 @@ public void open(FunctionContext context) throws Exception {
         super.open(context);
         LOG.debug("Creating an OpenAI client.");
         this.client = OpenAIUtils.createAsyncClient(baseUrl, apiKey, numRetry);
+        this.contextOverflowAction.initializeEncodingForContextLimit(model, maxContextSize);
+    }
+
+    @Override
+    public CompletableFuture<Collection<RowData>> asyncPredict(RowData rowData) {
+        if (rowData.isNullAt(0)) {
+            LOG.warn("Input is null, skipping prediction.");
+            return CompletableFuture.completedFuture(Collections.emptyList());
+        }
+
+        String input =
+                contextOverflowAction.processTokensWithLimit(
+                        model, rowData.getString(0).toString(), maxContextSize);
+        if (input == null) {
+            return CompletableFuture.completedFuture(Collections.emptyList());
+        }
+
+        return asyncPredictInternal(input);
     }
 
     @Override
@@ -120,6 +168,8 @@ public void close() throws Exception {
 
     protected abstract String getEndpointSuffix();
 
+    protected abstract CompletableFuture<Collection<RowData>> asyncPredictInternal(String input);
+
     protected void validateSingleColumnSchema(
             ResolvedSchema schema, LogicalType expectedType, String inputOrOutput) {
         List<Column> columns = schema.getColumns();