bailey Chat Completions API

bailey is a read-to-deploy Health AI Assistant you can use in your mobile apps

bailey implements the OpenAI ChatCompletion API: https://developers.openai.com/api/docs/guides/completions/. Any code that can talk to OpenAI ChatCompletion API should be able to talk to bailey API without any change.

Note: Previous messages should be passed in the messages list so bailey has the full context of the conversation.

Note: While the samples below show accumulating the streaming responses, in production we recommend updating the UI with the streaming responses as you receive them so users have a fast experience.

Note: bailey uses the same token that is used by the other SDKs.

Note: bailey API is available on the public internet so you can call it directly from your mobile app or website. No need to route through a backend service.

Integration Options

1- Use openai pypi package

Simplest path is to use the openai client library (https://pypi.org/project/openai/):

from openai import AsyncOpenAI, AsyncStream
from openai.types.chat import ChatCompletionChunk

client = AsyncOpenAI(
  api_key="{client key}",
  base_url="https://api.prod.icanbwell.com/bailey/v1",
  default_headers={
    "Authorization": f"Bearer {user token}",
  },

)

stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
  messages=[        
        {
            "role": "user",
            "content": "{specify your prompt here}", 
        }
    ],
    model="Bailey AI",
    stream=True, # enables streaming
)

content: str = ""
chunk: ChatCompletionChunk
async for chunk in stream:
    delta_content = "\n".join(
        [choice.delta.content or "" for choice in chunk.choices]
    )
		print(delta_content)

2- Use python calls without using openai pypi package

If you don't want to use the openai pypi package, you can just write direct calls:

import httpx
import json
from typing import AsyncIterator
from collections.abc import AsyncIterator as AsyncIteratorABC


async def stream_chat_completion(
    user_token: str,
    client_key: str,
    prompt: str,
    model: str = "Bailey AI",
    base_url: str = "https://api.prod.icanbwell.com/bailey/v1"
) -> AsyncIterator[str]:
    """
    Stream chat completion responses from the Bailey AI API using httpx.
    
    Args:
        user_token: Bearer token for user authentication
        client_key: API key for client authentication
        prompt: The user's prompt/message
        model: The model name to use (default: "Bailey AI")
        base_url: The base URL for the API
        
    Yields:
        str: Delta content from each chunk of the streaming response
        
    Raises:
        httpx.HTTPError: If the request fails
        json.JSONDecodeError: If response parsing fails
    """
    url: str = f"{base_url}/chat/completions"
    
    headers: dict[str, str] = {
        "Authorization": f"Bearer {user_token}",
        "Content-Type": "application/json",
        "api-key": client_key,  # OpenAI-style API key header
    }
    
    payload: dict[str, object] = {
        "messages": [
            {
                "role": "user",
                "content": prompt,
            }
        ],
        "model": model,
        "stream": True,
    }
    
    async with httpx.AsyncClient(timeout=60.0) as client:
        async with client.stream(
            "POST",
            url,
            headers=headers,
            json=payload,
        ) as response:
            response.raise_for_status()
            
            # Process Server-Sent Events (SSE) stream
            async for line in response.aiter_lines():
                # Skip empty lines
                if not line.strip():
                    continue
                
                # SSE format: "json}"
                if line.startswith("data: "):
                    data_str: str = line[6:]  # Remove "data: " prefix
                    
                    # Check for stream end marker
                    if data_str.strip() == "[DONE]":
                        break
                    
                    try:
                        # Parse the JSON chunk
                        chunk: dict = json.loads(data_str)
                        
                        # Extract delta content from all choices
                        if "choices" in chunk:
                            delta_contents: list[str] = []
                            for choice in chunk["choices"]:
                                if "delta" in choice and "content" in choice["delta"]:
                                    content: str | None = choice["delta"]["content"]
                                    if content:
                                        delta_contents.append(content)
                            
                            # Yield the combined delta content
                            if delta_contents:
                                yield "\n".join(delta_contents)
                    
                    except json.JSONDecodeError as e:
                        # Log or handle parsing errors gracefully
                        print(f"Warning: Failed to parse chunk: {e}")
                        continue


# Example usage:
async def main() -> None:
    """Example usage of the stream_chat_completion function."""
    user_token: str = "{user token}"
    client_key: str = "{client key}"
    prompt: str = "{specify your prompt here}"
    
    content: str = ""
    
    async for delta_content in stream_chat_completion(
        user_token=user_token,
        client_key=client_key,
        prompt=prompt,
    ):
        print(delta_content, end="", flush=True)
        content += delta_content
    
    print()  # Final newline
    print(f"\nFull response: {content}")


# To run:
# import asyncio
# asyncio.run(main())

3- curl commands

curl --no-buffer -X POST "https://api.prod.icanbwell.com/bailey/v1/chat/completions" \
  -H "Authorization: Bearer {user token}" \
  -H "api-key: {client key}" \
  -H "Content-Type: application/json" \
  -d '{
    "messages": [
      {
        "role": "user",
        "content": "{specify your prompt here}"
      }
    ],
    "model": "Bailey AI",
    "stream": true
  }'

4- Javascript using openai npm package

import OpenAI from 'openai';

/**
 * Stream chat completion responses from the Bailey AI API.
 * 
 * @param {string} userToken - Bearer token for user authentication
 * @param {string} clientKey - API key for client authentication
 * @param {string} prompt - The user's prompt/message
 * @param {string} [model='Bailey AI'] - The model name to use
 * @returns {Promise<string>} The complete response content
 */
async function streamChatCompletion(
  userToken,
  clientKey,
  prompt,
  model = 'Bailey AI'
) {
  // Initialize OpenAI client with custom configuration
  const client = new OpenAI({
    apiKey: clientKey,
    baseURL: 'https://api.prod.icanbwell.com/bailey/v1',
    defaultHeaders: {
      'Authorization': `Bearer ${userToken}`,
    },
  });

  // Create streaming chat completion
  const stream = await client.chat.completions.create({
    messages: [
      {
        role: 'user',
        content: prompt,
      },
    ],
    model: model,
    stream: true,
  });

  let content = '';

  // Iterate through the stream chunks
  for await (const chunk of stream) {
    // Extract delta content from all choices
    const deltaContent = chunk.choices
      .map(choice => choice.delta?.content || '')
      .join('\n');

    if (deltaContent) {
      process.stdout.write(deltaContent);
      content += deltaContent;
    }
  }

  console.log(); // Final newline
  return content;
}

// Example usage
async function main() {
  try {
    const userToken = '{user token}';
    const clientKey = '{client key}';
    const prompt = '{specify your prompt here}';

    console.log('Streaming response:');
    const fullResponse = await streamChatCompletion(
      userToken,
      clientKey,
      prompt
    );

    console.log('\n--- Complete Response ---');
    console.log(fullResponse);
  } catch (error) {
    console.error('Error:', error.message);
    if (error.response) {
      console.error('Response status:', error.response.status);
      console.error('Response data:', error.response.data);
    }
  }
}

// Run the example
main();

5- Javascript without using openai npm package

/**
 * Stream chat completion responses from the Bailey AI API using native fetch.
 * 
 * @param {string} userToken - Bearer token for user authentication
 * @param {string} clientKey - API key for client authentication
 * @param {string} prompt - The user's prompt/message
 * @param {string} [model='Bailey AI'] - The model name to use
 * @param {string} [baseUrl='https://api.prod.icanbwell.com/bailey/v1'] - The base URL for the API
 * @returns {Promise<string>} The complete response content
 */
async function streamChatCompletion(
  userToken,
  clientKey,
  prompt,
  model = 'Bailey AI',
  baseUrl = 'https://api.prod.icanbwell.com/bailey/v1'
) {
  const url = `${baseUrl}/chat/completions`;

  const headers = {
    'Authorization': `Bearer ${userToken}`,
    'Content-Type': 'application/json',
    'api-key': clientKey,
  };

  const payload = {
    messages: [
      {
        role: 'user',
        content: prompt,
      },
    ],
    model: model,
    stream: true,
  };

  try {
    const response = await fetch(url, {
      method: 'POST',
      headers: headers,
      body: JSON.stringify(payload),
    });

    if (!response.ok) {
      throw new Error(`HTTP error! status: ${response.status}`);
    }

    const reader = response.body.getReader();
    const decoder = new TextDecoder('utf-8');
    let content = '';
    let buffer = '';

    while (true) {
      const { done, value } = await reader.read();

      if (done) {
        break;
      }

      // Decode the chunk and add to buffer
      buffer += decoder.decode(value, { stream: true });

      // Process complete lines from buffer
      const lines = buffer.split('\n');
      
      // Keep the last incomplete line in buffer
      buffer = lines.pop() || '';

      for (const line of lines) {
        const trimmedLine = line.trim();

        // Skip empty lines
        if (!trimmedLine) {
          continue;
        }

        // Process SSE format: "json}"
        if (trimmedLine.startsWith('data: ')) {
          const dataStr = trimmedLine.slice(6); // Remove "data: " prefix

          // Check for stream end marker
          if (dataStr === '[DONE]') {
            break;
          }

          try {
            // Parse the JSON chunk
            const chunk = JSON.parse(dataStr);

            // Extract delta content from all choices
            if (chunk.choices && Array.isArray(chunk.choices)) {
              const deltaContents = chunk.choices
                .map(choice => choice.delta?.content || '')
                .filter(content => content);

              if (deltaContents.length > 0) {
                const deltaContent = deltaContents.join('\n');
                process.stdout.write(deltaContent); // For Node.js
                // For browser: document.getElementById('output').textContent += deltaContent;
                content += deltaContent;
              }
            }
          } catch (error) {
            console.warn('Failed to parse chunk:', error.message);
            continue;
          }
        }
      }
    }

    console.log(); // Final newline
    return content;

  } catch (error) {
    console.error('Error streaming chat completion:', error.message);
    throw error;
  }
}

// Example usage
async function main() {
  const userToken = '{user token}';
  const clientKey = '{client key}';
  const prompt = '{specify your prompt here}';

  try {
    console.log('Streaming response:');
    const fullResponse = await streamChatCompletion(
      userToken,
      clientKey,
      prompt
    );

    console.log('\n--- Complete Response ---');
    console.log(fullResponse);
  } catch (error) {
    console.error('Error:', error.message);
  }
}

// Run the example (Node.js)
main();