Skip to content

Commit 52528cf

Browse files
authored
Text Generate REST API schema (kserve#18)
* Create generate_rest.yaml Propose generate rest api endpoints Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> * Update generate_rest.yaml Signed-off-by: Gavrish Prabhu <[email protected]> --------- Signed-off-by: Gavrish Prabhu <[email protected]>
1 parent 853da9f commit 52528cf

File tree

1 file changed

+255
-0
lines changed

1 file changed

+255
-0
lines changed
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
openapi: 3.1.0
2+
info:
3+
title: Open Inference API for text generation
4+
description: Open Inference API for text generation
5+
version: 1.0.0
6+
components:
7+
schemas:
8+
Details:
9+
type: object
10+
required:
11+
- finish_reason
12+
- logprobs
13+
additionalProperties: {}
14+
properties:
15+
finish_reason:
16+
$ref: '#/components/schemas/Finish_Reason'
17+
logprobs:
18+
$ref: '#/components/schemas/Logprobs'
19+
Finish_Reason:
20+
type: string
21+
enum:
22+
- length
23+
- eos_token
24+
- stop_sequence
25+
description: The reason the model stopped generating tokens. `length` if number of generated tokens == `max_tokens`. `eos_token` if the model generated its end of sequence token and `stop_sequence` if the model generated a text included in `stop` array
26+
GenerateErrorResponse:
27+
type: object
28+
required:
29+
- error
30+
properties:
31+
error:
32+
type: string
33+
GenerateParameters:
34+
type: object
35+
additionalProperties: {}
36+
properties:
37+
temperature:
38+
type: number
39+
format: float
40+
default: 1
41+
minimum: 0
42+
description: What sampling temperature to use, higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
43+
top_p:
44+
type: number
45+
format: float
46+
maximum: 1
47+
minimum: 0
48+
description: An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
49+
max_tokens:
50+
type: integer
51+
format: int32
52+
default: 20
53+
minimum: 1
54+
description: The maximum number of tokens to generate in the completion.
55+
stop:
56+
type: array
57+
items:
58+
type: string
59+
description: Sequences where the API will stop generating further tokens.
60+
details:
61+
type: boolean
62+
description: Flag to request for detailed response body that would include finish_reason and logprobs.
63+
GenerateRequest:
64+
type: object
65+
required:
66+
- text_input
67+
properties:
68+
text_input:
69+
type: string
70+
parameters:
71+
allOf:
72+
- $ref: '#/components/schemas/GenerateParameters'
73+
GenerateResponse:
74+
type: object
75+
required:
76+
- text_output
77+
- model_name
78+
properties:
79+
text_output:
80+
type: string
81+
model_name:
82+
type: string
83+
model_version:
84+
type: string
85+
details:
86+
$ref: '#/components/schemas/Details'
87+
GenerateStreamResponse:
88+
type: object
89+
required:
90+
- text_output
91+
- model_name
92+
properties:
93+
text_output:
94+
type: string
95+
model_name:
96+
type: string
97+
model_version:
98+
type: string
99+
details:
100+
$ref: '#/components/schemas/StreamDetails'
101+
Logprobs:
102+
type: array
103+
items:
104+
$ref: '#/components/schemas/Token'
105+
description: Log probability information for the tokens.
106+
StreamDetails:
107+
type: object
108+
required:
109+
- finish_reason
110+
- token
111+
additionalProperties: {}
112+
properties:
113+
finish_reason:
114+
$ref: '#/components/schemas/Finish_Reason'
115+
token:
116+
$ref: '#/components/schemas/Token'
117+
Token:
118+
type: object
119+
required:
120+
- id
121+
- text
122+
- logprob
123+
- special
124+
properties:
125+
id:
126+
type: integer
127+
format: int32
128+
minimum: 0
129+
description: Id of the token.
130+
logprob:
131+
type: number
132+
format: float
133+
description: The log probability of this token.
134+
special:
135+
type: boolean
136+
description: Describes if the token is a special token. Can be used to ignore tokens when concatenating
137+
text:
138+
type: string
139+
description: The token text value.
140+
paths:
141+
/v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate:
142+
post:
143+
parameters:
144+
- name: MODEL_NAME
145+
required: true
146+
in: path
147+
schema:
148+
type: string
149+
- name: MODEL_VERSION
150+
required: true
151+
in: path
152+
schema:
153+
type: string
154+
requestBody:
155+
content:
156+
application/json:
157+
schema:
158+
$ref: '#/components/schemas/GenerateRequest'
159+
responses:
160+
'200':
161+
description: generated text
162+
content:
163+
application/json:
164+
schema:
165+
$ref: '#/components/schemas/GenerateResponse'
166+
'422':
167+
description: Input validation error
168+
content:
169+
application/json:
170+
schema:
171+
$ref: '#/components/schemas/GenerateErrorResponse'
172+
example:
173+
error: Input validation error
174+
'424':
175+
description: Generation Error
176+
content:
177+
application/json:
178+
schema:
179+
$ref: '#/components/schemas/GenerateErrorResponse'
180+
example:
181+
error: Request failed during generation
182+
'429':
183+
description: Model is overloaded
184+
content:
185+
application/json:
186+
schema:
187+
$ref: '#/components/schemas/GenerateErrorResponse'
188+
example:
189+
error: Model is overloaded
190+
'500':
191+
description: Incomplete generation
192+
content:
193+
application/json:
194+
schema:
195+
$ref: '#/components/schemas/GenerateErrorResponse'
196+
example:
197+
error: Incomplete generation
198+
199+
/v2/models/${MODEL_NAME}/versions/${MODEL_VERSION}/generate_stream:
200+
post:
201+
parameters:
202+
- name: MODEL_NAME
203+
required: true
204+
in: path
205+
schema:
206+
type: string
207+
- name: MODEL_VERSION
208+
required: true
209+
in: path
210+
schema:
211+
type: string
212+
requestBody:
213+
content:
214+
application/json:
215+
schema:
216+
$ref: '#/components/schemas/GenerateRequest'
217+
responses:
218+
'200':
219+
description: generated text stream
220+
content:
221+
text/event-stream:
222+
schema:
223+
$ref: '#/components/schemas/GenerateStreamResponse'
224+
'422':
225+
description: Input validation error
226+
content:
227+
text/event-stream:
228+
schema:
229+
$ref: '#/components/schemas/GenerateErrorResponse'
230+
example:
231+
error: Input validation error
232+
'424':
233+
description: Generation Error
234+
content:
235+
text/event-stream:
236+
schema:
237+
$ref: '#/components/schemas/GenerateErrorResponse'
238+
example:
239+
error: Request failed during generation
240+
'429':
241+
description: Model is overloaded
242+
content:
243+
text/event-stream:
244+
schema:
245+
$ref: '#/components/schemas/GenerateErrorResponse'
246+
example:
247+
error: Model is overloaded
248+
'500':
249+
description: Incomplete generation
250+
content:
251+
text/event-stream:
252+
schema:
253+
$ref: '#/components/schemas/GenerateErrorResponse'
254+
example:
255+
error: Incomplete generation

0 commit comments

Comments
 (0)