Text Generate REST API schema (kserve#18)

gavrissh · web-flow · commit 52528cf4c483 · 2024-02-05T19:44:48.000-05:00
* Create generate_rest.yaml

Propose generate rest api endpoints

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

* Update generate_rest.yaml

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;

---------

Signed-off-by: Gavrish Prabhu &lt;gavrish.prabhu@nutanix.com&gt;
diff --git a/specification/protocol/generate_rest.yaml b/specification/protocol/generate_rest.yaml
@@ -0,0 +1,255 @@
+openapi: 3.1.0
+info:
+  title: Open Inference API for text generation
+  description: Open Inference API for text generation
+  version: 1.0.0
+components:
+  schemas:
+    Details:
+      type: object
+      required: 
+        - finish_reason
+        - logprobs
+      additionalProperties: {}
+      properties:
+        finish_reason:
+          $ref: '#/components/schemas/Finish_Reason'
+        logprobs:
+          $ref: '#/components/schemas/Logprobs'
+    Finish_Reason:
+      type: string
+      enum: 
+        - length
+        - eos_token
+        - stop_sequence
+      description: The reason the model stopped generating tokens. `length` if number of generated tokens == `max_tokens`. `eos_token` if the model generated its end of sequence token and `stop_sequence` if the model generated a text included in `stop` array
+    GenerateErrorResponse:
+      type: object
+      required:
+        - error
+      properties:
+        error:
+          type: string  
+    GenerateParameters:
+      type: object
+      additionalProperties: {}
+      properties:
+        temperature:
+          type: number
+          format: float
+          default: 1
+          minimum: 0
+          description: What sampling temperature to use, higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+        top_p:
+          type: number
+          format: float
+          maximum: 1
+          minimum: 0
+          description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+        max_tokens:
+          type: integer
+          format: int32
+          default: 20
+          minimum: 1
+          description: The maximum number of tokens to generate in the completion.
+        stop:
+          type: array
+          items:
+            type: string
+          description: Sequences where the API will stop generating further tokens.
+        details:
+          type: boolean 
+          description: Flag to request for detailed response body that would include finish_reason and logprobs.
+    GenerateRequest:
+      type: object
+      required: 
+        - text_input
+      properties:
+        text_input:
+          type: string
+        parameters:
+          allOf: 
+            - $ref: '#/components/schemas/GenerateParameters'
+    GenerateResponse:
+      type: object
+      required:
+        - text_output
+        - model_name
+      properties:
+        text_output:
+          type: string
+        model_name:
+          type: string
+        model_version:
+          type: string
+        details:
+          $ref: '#/components/schemas/Details'
+    GenerateStreamResponse:
+      type: object
+      required:
+        - text_output
+        - model_name
+      properties:
+        text_output:
+          type: string
+        model_name:
+          type: string
+        model_version:
+          type: string
+        details:
+          $ref: '#/components/schemas/StreamDetails'
+    Logprobs:
+      type: array
+      items:
+        $ref: '#/components/schemas/Token'
+      description: Log probability information for the tokens.
+    StreamDetails:
+      type: object
+      required: 
+        - finish_reason
+        - token
+      additionalProperties: {}
+      properties:
+        finish_reason:
+          $ref: '#/components/schemas/Finish_Reason'
+        token:
+          $ref: '#/components/schemas/Token'
+    Token:
+      type: object
+      required:
+        - id
+        - text
+        - logprob
+        - special
+      properties:
+        id:
+          type: integer
+          format: int32
+          minimum: 0
+          description: Id of the token.
+        logprob:
+          type: number
+          format: float
+          description: The log probability of this token.
+        special:
+          type: boolean
+          description: Describes if the token is a special token. Can be used to ignore tokens when concatenating
+        text:
+          type: string
+          description: The token text value.
+paths:
+  /v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate:
+    post:
+      parameters:
+        - name: MODEL_NAME
+          required: true
+          in: path
+          schema:
+            type: string
+        - name: MODEL_VERSION
+          required: true
+          in: path
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GenerateRequest'
+      responses:
+        '200':
+          description: generated text
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateResponse'
+        '422':
+          description: Input validation error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Input validation error
+        '424':
+          description: Generation Error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Request failed during generation
+        '429':
+          description: Model is overloaded
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Model is overloaded
+        '500':
+          description: Incomplete generation
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Incomplete generation
+
+  /v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate_stream:
+    post:
+      parameters:
+        - name: MODEL_NAME
+          required: true
+          in: path
+          schema:
+            type: string
+        - name: MODEL_VERSION
+          required: true
+          in: path
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/GenerateRequest'
+      responses:
+        '200':
+          description: generated text stream
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateStreamResponse'
+        '422':
+          description: Input validation error
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Input validation error
+        '424':
+          description: Generation Error
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Request failed during generation
+        '429':
+          description: Model is overloaded
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Model is overloaded
+        '500':
+          description: Incomplete generation
+          content:
+            text/event-stream:
+              schema:
+                $ref: '#/components/schemas/GenerateErrorResponse'
+              example:
+                error: Incomplete generation