import argparse
import asyncio
from openai import AsyncOpenAI
async def main(
num_requests: int,
input_tokens: int,
max_output_tokens: int,
model: str
):
client = AsyncOpenAI(
base_url="https://api.sailresearch.com/v1",
api_key="YOUR_KEY_HERE",
)
print("Supported Models:")
supported_models = [m.id for m in (await client.models.list()).data]
print(supported_models)
response_ids = []
for i in range(num_requests):
filler = ""
if input_tokens > 0:
filler = " " + ("word " * input_tokens)
content = f"TASK {i}: What is a fun fact about the number {i}? Then, find the word at index {i} in the following sequence of words: {filler}"
response = await client.responses.create(
model=model,
input=[{"role": "user", "content": content}],
max_output_tokens=max_output_tokens,
background=True, # returns immediately with an ID to poll on; this lets requests process for many minutes without HTTP timeouts. Set it False if you prefer a blocking call.
)
response_ids.append(response.id)
print(
f"Created {len(response_ids)} response IDs (input ~{len(content.split())} words each)"
)
completed = {}
pending = set(response_ids)
while pending:
print(f"\r{len(completed)}/{len(response_ids)} complete", end="", flush=True)
for response_id in list(pending):
response = await client.responses.retrieve(response_id)
if response.status == "completed":
completed[response_id] = response
pending.discard(response_id)
await asyncio.sleep(1)
print(f"\n{len(completed)}/{len(response_ids)} complete")
for response_id in response_ids:
print(
f"\n\n{response_id} completed with output:\n{completed[response_id].output_text}"
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--num-requests", type=int, default=10)
parser.add_argument("--input-tokens", type=int, default=50)
parser.add_argument("--max-output-tokens", type=int, default=4000)
parser.add_argument("--model", type=str, default="moonshotai/Kimi-K2.5")
args = parser.parse_args()
asyncio.run(
main(
num_requests=args.num_requests,
input_tokens=args.input_tokens,
max_output_tokens=args.max_output_tokens,
model=args.model,
)
)