Skip to main content
Sail supports the OpenAI Responses API (/v1/responses). If you’re already using the OpenAI SDK, switching to Sail is mostly a base URL and API key change.

1. Get your API key

Sign up at the Sail dashboard and create an API key.

2. Make a request

Install the OpenAI SDK and point it at Sail. Create a response with background=True so the request returns a response_id immediately, then poll until completion:
import argparse
import asyncio

from openai import AsyncOpenAI


async def main(
    num_requests: int,
    input_tokens: int,
    max_output_tokens: int,
    model: str
):
    client = AsyncOpenAI(
        base_url="https://api.sailresearch.com/v1",
        api_key="YOUR_KEY_HERE",
    )

    print("Supported Models:")
    supported_models = [m.id for m in (await client.models.list()).data]
    print(supported_models)

    response_ids = []
    for i in range(num_requests):

        filler = ""
        if input_tokens > 0:
            filler = " " + ("word " * input_tokens)
        content = f"TASK {i}: What is a fun fact about the number {i}? Then, find the word at index {i} in the following sequence of words: {filler}"

        response = await client.responses.create(
            model=model,
            input=[{"role": "user", "content": content}],
            max_output_tokens=max_output_tokens,
            background=True, # returns immediately with an ID to poll on; this lets requests process for many minutes without HTTP timeouts. Set it False if you prefer a blocking call.
        )
        response_ids.append(response.id)
    print(
        f"Created {len(response_ids)} response IDs (input ~{len(content.split())} words each)"
    )

    completed = {}
    pending = set(response_ids)
    while pending:
        print(f"\r{len(completed)}/{len(response_ids)} complete", end="", flush=True)
        for response_id in list(pending):
            response = await client.responses.retrieve(response_id)
            if response.status == "completed":
                completed[response_id] = response
                pending.discard(response_id)
        await asyncio.sleep(1)

    print(f"\n{len(completed)}/{len(response_ids)} complete")
    for response_id in response_ids:
        print(
            f"\n\n{response_id} completed with output:\n{completed[response_id].output_text}"
        )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--num-requests", type=int, default=10)
    parser.add_argument("--input-tokens", type=int, default=50)
    parser.add_argument("--max-output-tokens", type=int, default=4000)
    parser.add_argument("--model", type=str, default="moonshotai/Kimi-K2.5")
    args = parser.parse_args()

    asyncio.run(
        main(
            num_requests=args.num_requests,
            input_tokens=args.input_tokens,
            max_output_tokens=args.max_output_tokens,
            model=args.model,
        )
    )

cURL Example

# Create the response (returns immediately with an id)
RESPONSE_ID=$(curl -s https://api.sailresearch.com/v1/responses \
  -H "Authorization: Bearer YOUR_SAIL_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "openai/gpt-oss-20b",
    "input": "Explain the key ideas behind transformer architectures.",
    "background": true,
    "metadata": { "completion_window": "15m" }
  }' | jq -r '.id')

# Poll until completed
until [ "$(curl -s https://api.sailresearch.com/v1/responses/$RESPONSE_ID \
  -H "Authorization: Bearer YOUR_SAIL_API_KEY" | jq -r '.status')" = "completed" ]; do
  sleep 1
done

curl -s https://api.sailresearch.com/v1/responses/$RESPONSE_ID \
  -H "Authorization: Bearer YOUR_SAIL_API_KEY" | jq

3. Next steps