KnowledgeUpdatePlayground/openaiAPI.py at main · Aochong-Li/KnowledgeUpdatePlayground · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import json
import pandas as pd
from openai import OpenAI
from tqdm import tqdm
import os
import time

from concurrent.futures import ProcessPoolExecutor, as_completed


def init_client(model: str):
    """Initialize OpenAI/Together AI client based on model type."""
    if 'gpt' in model or 'o1' in model or 'o3' in model:
        client = OpenAI(
            api_key=os.environ['OPENAI_API_KEY'],
            organization=os.environ['OPENAI_ORG_ID'],
            project=os.environ['OPENAI_PROJECT_ID']
        )
    else:
        client = OpenAI(
            api_key=os.environ['TOGETHER_API_KEY'],
            base_url='https://api.together.xyz/v1'
        )
    return client

# default client is gpt
client = init_client('gpt')

def chat_completions(input_prompt: str, developer_message: str = 'You are a helpful assistant',
                     model: str = 'gpt-4o', temperature: float = 0.0,  max_tokens: int = 1024, n: int = 1,
                     top_p: float = 1.0, frequency_penalty: float = 0.0, presence_penalty: float = 0.0, stop: list[str] = None
                     ):
    """Generate chat completions using specified model."""
    client = init_client(model)

    messages = [
        {"role": "system" if 'o3' in model else "developer", "content": developer_message},
        {"role": "user", "content": input_prompt}
    ]

    try:
        if 'o3' in model:
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                reasoning_effort="medium",
                max_completion_tokens=max_tokens
            )
        else:
            response = client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                n=n,
                top_p=top_p,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
                stop=stop
            )
    except Exception:
        return None

    return response.choices[0].message.content if len(response.choices) == 1 else [choice.message.content for choice in response.choices]

def process_chunk_wrapper(args):
    """Process a chunk of inputs in parallel."""
    chunk, chunk_id = args
    results = []
    for input_object in tqdm(chunk, desc=f"Process-{chunk_id}", position=chunk_id):
        _id = int(input_object['custom_id'].replace('idx_', ''))
        input_prompt = input_object['body']['messages'][1]['content']
        developer_message = input_object['body']['messages'][0]['content']
        model = input_object['body']['model']
        temperature = input_object['body']['temperature']
        max_tokens = input_object['body']['max_tokens']
        n = input_object['body']['n']
        top_p = input_object['body']['top_p']
        frequency_penalty = input_object['body']['frequency_penalty']
        presence_penalty = input_object['body']['presence_penalty']
        stop = input_object['body']['stop']

        try:
            response = chat_completions(
                input_prompt=input_prompt,
                developer_message=developer_message,
                model=model,
                temperature=temperature,
                max_tokens=max_tokens,
                n=n,
                top_p=top_p,
                frequency_penalty=frequency_penalty,
                presence_penalty=presence_penalty,
                stop=stop
            )
        except:
            response = None
        results.append((_id, response))
        time.sleep(1)
    return results

def chat_completions_parallel(input_filepath: str,
                              cache_filepath: str,
                              num_processes: int = 20
                              ):
    """Execute chat completions in parallel using ProcessPoolExecutor."""
    with open(input_filepath, 'r') as f:
        batch_input = [json.loads(line) for line in f]

    chunk_size = max(1, len(batch_input) // num_processes)
    chunks = [batch_input[i:i + chunk_size] for i in range(0, len(batch_input), chunk_size)]
    args = [(chunk, i) for i, chunk in enumerate(chunks)]

    results = []
    with ProcessPoolExecutor(max_workers=num_processes) as executor:
        futures = [executor.submit(process_chunk_wrapper, arg) for arg in args]
        for future in as_completed(futures):
            results.extend(future.result())

    sorted_results = sorted(results, key=lambda x: x[0])
    output_df = pd.DataFrame({'response': [resp for _, resp in sorted_results]})
    output_df.to_pickle(cache_filepath)

def minibatch_stream_generate_response(input_filepath: str,
                                       batch_log_filepath: str = None,
                                       minibatch_filepath: str = '/home/al2644/research/openai_batch_io/minibatchinput.jsonl',
                                       batch_size: int = 10,
                                       completion_window: str = '24h',
                                       failed_batch_start: int = None,
                                       failed_batch_end: int = None,
                                       batch_rate_limit: int = None):
    batch_logs = {}
    with open(input_filepath, 'r') as f:
        batch_input = [json.loads(line) for line in f]
        client = init_client(batch_input[0]['body']['model'])

        if failed_batch_start is not None and failed_batch_end is not None:
            batch_input = batch_input[failed_batch_start: failed_batch_end]

    while len(batch_logs) * batch_size < len(batch_input):
        batch_idx = batch_size * len(batch_logs)

        with open(minibatch_filepath, 'w') as f:
            for item in batch_input[batch_idx : batch_idx + batch_size]:
                f.write(json.dumps(item) + '\n')

        # uplaod batch input files
        batch_input_file = client.files.create(
            file=open(minibatch_filepath, "rb"),
            purpose="batch"
        )

        # create batch
        batch_input_file_id = batch_input_file.id

        batch_log = client.batches.create(
            input_file_id=batch_input_file_id,
            endpoint="/v1/chat/completions",
            completion_window=completion_window,
            metadata={
            "description": f"minibatch_{batch_idx}"
            }
        )
        print(f'batch {batch_log.id} is created')

        batch_logs[batch_idx] = batch_log.id

        if batch_rate_limit is not None and len(batch_logs) % batch_rate_limit == 0:
            time.sleep(30)

        with open(batch_log_filepath, 'w') as f:
            json.dump(batch_logs, f)

def minibatch_retrieve_response(output_dict: dict = None):
    """Retrieve responses from minibatches."""
    model_outputs = {}
    for _, output_file_id in output_dict.items():
        try:
            file_response = client.files.content(output_file_id)
            print(f'Retrieving output {output_file_id}')

            text_responses = file_response.text.split('\n')[:-1]
            json_responses = [json.loads(x) for x in text_responses]

            for output in json_responses:
                custom_id = int(output['custom_id'].replace('idx_', ''))
                content = output['response']['body']['choices'][0]['message']['content']
                model_outputs[custom_id] = content
        except:
            continue

    return pd.DataFrame.from_dict(model_outputs, orient='index', columns=['response'])

def minibatch_stream_retry(batch_log_filepath: str, batch_rate_limit: int = None):
    """Retry failed minibatches."""
    failed_batch_logs = {}
    retry_batch_logs = {}

    with open(batch_log_filepath, 'r') as f:
        batch_logs = json.load(f)

    for batch_idx, batch_log_id in batch_logs.items():
        status = check_batch_status(batch_log_id)
        if status == 'failed':
            failed_batch_logs[batch_idx] = batch_log_id

    for batch_idx, batch_log_id in failed_batch_logs.items():
        print(f'Retrying batch {batch_idx}')

        batch_log = client.batches.retrieve(batch_log_id)
        batch_input_file_id = batch_log.input_file_id
        completion_window = batch_log.completion_window

        batch_log = client.batches.create(
            input_file_id=batch_input_file_id,
            endpoint="/v1/chat/completions",
            completion_window=completion_window,
            metadata={
            "description": f"minibatch_{batch_idx}"
            }
        )
        print(f'batch {batch_log.id} is created')

        retry_batch_logs[batch_idx] = batch_log.id

        if batch_rate_limit is not None and len(retry_batch_logs) % batch_rate_limit == 0:
            time.sleep(30)

        batch_logs.update(retry_batch_logs)

        with open(batch_log_filepath, 'w') as f:
            json.dump(batch_logs, f)

def batch_query_template(input_prompt: str, developer_message: str = 'You are a helpful assistant', model: str = 'gpt-4o', custom_id: str = None,
                         temperature: float = 0.0, max_tokens: int = 1024, n: int = 1, top_p: float = 1.0, frequency_penalty: float = 0.0,
                         presence_penalty: float = 0.0, stop: list[str] = None):
    """Create a template for batch query."""
    query_template = {
        "custom_id": custom_id,
        "method": "POST",
        "url": "/v1/chat/completions",
        "body": {
            "model": model,
            "temperature": temperature,
            "messages": [
                {"role": "developer", "content": developer_message},
                {"role": "user", "content": input_prompt}
            ],
            "max_tokens": max_tokens,
            "n": n,
            "top_p": top_p,
            "frequency_penalty": frequency_penalty,
            "presence_penalty": presence_penalty,
            "stop": stop
        }
    }
    return query_template

def retrieve_batch_output_file_id(batch_log_id: str, model='gpt'):
    """Retrieve output file ID from batch log."""
    client = init_client(model)
    batch_log = client.batches.retrieve(batch_log_id)
    return batch_log.output_file_id

def check_batch_status(batch_log_id: str, model='gpt'):
    """Check status of a batch."""
    client = init_client(model)
    batch_log = client.batches.retrieve(batch_log_id)
    return batch_log.status

def check_batch_error(batch_log_id: str, model='gpt'):
    """Check for errors in a batch."""
    client = init_client(model)
    batch_log = client.batches.retrieve(batch_log_id)

    if batch_log.status == 'failed':
        print(f'Batch {batch_log_id} failed with error: {batch_log.errors}')
        return batch_log.errors
    return None

def cancel_batch(batch_log_id: str, model='gpt'):
    """Cancel a batch operation."""
    client = init_client(model)
    client.batches.cancel(batch_log_id)
    return f'Batch {batch_log_id} is cancelled'

def cache_batch_query(filepath: str, query: dict):
    """Cache a batch query to file."""
    with open(filepath, 'a') as f:
        f.write(json.dumps(query) + '\n')