Initial commit: Email alerts application

2025-07-25 11:31:36 +01:00
commit adfb625ae9
6322 changed files with 2882826 additions and 0 deletions
@@ -0,0 +1,89 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .files import (
+    Files,
+    AsyncFiles,
+    FilesWithRawResponse,
+    AsyncFilesWithRawResponse,
+    FilesWithStreamingResponse,
+    AsyncFilesWithStreamingResponse,
+)
+from .models import (
+    Models,
+    AsyncModels,
+    ModelsWithRawResponse,
+    AsyncModelsWithRawResponse,
+    ModelsWithStreamingResponse,
+    AsyncModelsWithStreamingResponse,
+)
+from .batches import (
+    Batches,
+    AsyncBatches,
+    BatchesWithRawResponse,
+    AsyncBatchesWithRawResponse,
+    BatchesWithStreamingResponse,
+    AsyncBatchesWithStreamingResponse,
+)
+from .embeddings import (
+    Embeddings,
+    AsyncEmbeddings,
+    EmbeddingsWithRawResponse,
+    AsyncEmbeddingsWithRawResponse,
+    EmbeddingsWithStreamingResponse,
+    AsyncEmbeddingsWithStreamingResponse,
+)
+
+__all__ = [
+    "Chat",
+    "AsyncChat",
+    "ChatWithRawResponse",
+    "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
+    "Embeddings",
+    "AsyncEmbeddings",
+    "EmbeddingsWithRawResponse",
+    "AsyncEmbeddingsWithRawResponse",
+    "EmbeddingsWithStreamingResponse",
+    "AsyncEmbeddingsWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+    "Models",
+    "AsyncModels",
+    "ModelsWithRawResponse",
+    "AsyncModelsWithRawResponse",
+    "ModelsWithStreamingResponse",
+    "AsyncModelsWithStreamingResponse",
+    "Batches",
+    "AsyncBatches",
+    "BatchesWithRawResponse",
+    "AsyncBatchesWithRawResponse",
+    "BatchesWithStreamingResponse",
+    "AsyncBatchesWithStreamingResponse",
+    "Files",
+    "AsyncFiles",
+    "FilesWithRawResponse",
+    "AsyncFilesWithRawResponse",
+    "FilesWithStreamingResponse",
+    "AsyncFilesWithStreamingResponse",
+]
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .audio import (
+    Audio,
+    AsyncAudio,
+    AudioWithRawResponse,
+    AsyncAudioWithRawResponse,
+    AudioWithStreamingResponse,
+    AsyncAudioWithStreamingResponse,
+)
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Speech",
+    "AsyncSpeech",
+    "SpeechWithRawResponse",
+    "AsyncSpeechWithRawResponse",
+    "SpeechWithStreamingResponse",
+    "AsyncSpeechWithStreamingResponse",
+    "Transcriptions",
+    "AsyncTranscriptions",
+    "TranscriptionsWithRawResponse",
+    "AsyncTranscriptionsWithRawResponse",
+    "TranscriptionsWithStreamingResponse",
+    "AsyncTranscriptionsWithStreamingResponse",
+    "Translations",
+    "AsyncTranslations",
+    "TranslationsWithRawResponse",
+    "AsyncTranslationsWithRawResponse",
+    "TranslationsWithStreamingResponse",
+    "AsyncTranslationsWithStreamingResponse",
+    "Audio",
+    "AsyncAudio",
+    "AudioWithRawResponse",
+    "AsyncAudioWithRawResponse",
+    "AudioWithStreamingResponse",
+    "AsyncAudioWithStreamingResponse",
+]
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .speech import (
+    Speech,
+    AsyncSpeech,
+    SpeechWithRawResponse,
+    AsyncSpeechWithRawResponse,
+    SpeechWithStreamingResponse,
+    AsyncSpeechWithStreamingResponse,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .translations import (
+    Translations,
+    AsyncTranslations,
+    TranslationsWithRawResponse,
+    AsyncTranslationsWithRawResponse,
+    TranslationsWithStreamingResponse,
+    AsyncTranslationsWithStreamingResponse,
+)
+from .transcriptions import (
+    Transcriptions,
+    AsyncTranscriptions,
+    TranscriptionsWithRawResponse,
+    AsyncTranscriptionsWithRawResponse,
+    TranscriptionsWithStreamingResponse,
+    AsyncTranscriptionsWithStreamingResponse,
+)
+
+__all__ = ["Audio", "AsyncAudio"]
+
+
+class Audio(SyncAPIResource):
+    @cached_property
+    def speech(self) -> Speech:
+        return Speech(self._client)
+
+    @cached_property
+    def transcriptions(self) -> Transcriptions:
+        return Transcriptions(self._client)
+
+    @cached_property
+    def translations(self) -> Translations:
+        return Translations(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AudioWithStreamingResponse(self)
+
+
+class AsyncAudio(AsyncAPIResource):
+    @cached_property
+    def speech(self) -> AsyncSpeech:
+        return AsyncSpeech(self._client)
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptions:
+        return AsyncTranscriptions(self._client)
+
+    @cached_property
+    def translations(self) -> AsyncTranslations:
+        return AsyncTranslations(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncAudioWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncAudioWithStreamingResponse(self)
+
+
+class AudioWithRawResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def speech(self) -> SpeechWithRawResponse:
+        return SpeechWithRawResponse(self._audio.speech)
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithRawResponse:
+        return TranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithRawResponse:
+        return TranslationsWithRawResponse(self._audio.translations)
+
+
+class AsyncAudioWithRawResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithRawResponse:
+        return AsyncSpeechWithRawResponse(self._audio.speech)
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithRawResponse:
+        return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithRawResponse:
+        return AsyncTranslationsWithRawResponse(self._audio.translations)
+
+
+class AudioWithStreamingResponse:
+    def __init__(self, audio: Audio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def speech(self) -> SpeechWithStreamingResponse:
+        return SpeechWithStreamingResponse(self._audio.speech)
+
+    @cached_property
+    def transcriptions(self) -> TranscriptionsWithStreamingResponse:
+        return TranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> TranslationsWithStreamingResponse:
+        return TranslationsWithStreamingResponse(self._audio.translations)
+
+
+class AsyncAudioWithStreamingResponse:
+    def __init__(self, audio: AsyncAudio) -> None:
+        self._audio = audio
+
+    @cached_property
+    def speech(self) -> AsyncSpeechWithStreamingResponse:
+        return AsyncSpeechWithStreamingResponse(self._audio.speech)
+
+    @cached_property
+    def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse:
+        return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions)
+
+    @cached_property
+    def translations(self) -> AsyncTranslationsWithStreamingResponse:
+        return AsyncTranslationsWithStreamingResponse(self._audio.translations)
@@ -0,0 +1,233 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    BinaryAPIResponse,
+    AsyncBinaryAPIResponse,
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_custom_raw_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_raw_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from ...types.audio import speech_create_params
+from ..._base_client import make_request_options
+
+__all__ = ["Speech", "AsyncSpeech"]
+
+
+class Speech(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return SpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return SpeechWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, Literal["playai-tts", "playai-tts-arabic"]],
+        voice: str,
+        response_format: Literal["flac", "mp3", "mulaw", "ogg", "wav"] | NotGiven = NOT_GIVEN,
+        sample_rate: Literal[8000, 16000, 22050, 24000, 32000, 44100, 48000] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BinaryAPIResponse:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for.
+
+          model: One of the [available TTS models](/docs/text-to-speech).
+
+          voice: The voice to use when generating the audio. List of voices can be found
+              [here](/docs/text-to-speech).
+
+          response_format: The format of the generated audio. Supported formats are
+              `flac, mp3, mulaw, ogg, wav`.
+
+          sample_rate: The sample rate for generated audio
+
+          speed: The speed of the generated audio.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "audio/wav", **(extra_headers or {})}
+        return self._post(
+            "/openai/v1/audio/speech",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "response_format": response_format,
+                    "sample_rate": sample_rate,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BinaryAPIResponse,
+        )
+
+
+class AsyncSpeech(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSpeechWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncSpeechWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: str,
+        model: Union[str, Literal["playai-tts", "playai-tts-arabic"]],
+        voice: str,
+        response_format: Literal["flac", "mp3", "mulaw", "ogg", "wav"] | NotGiven = NOT_GIVEN,
+        sample_rate: Literal[8000, 16000, 22050, 24000, 32000, 44100, 48000] | NotGiven = NOT_GIVEN,
+        speed: float | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncBinaryAPIResponse:
+        """
+        Generates audio from the input text.
+
+        Args:
+          input: The text to generate audio for.
+
+          model: One of the [available TTS models](/docs/text-to-speech).
+
+          voice: The voice to use when generating the audio. List of voices can be found
+              [here](/docs/text-to-speech).
+
+          response_format: The format of the generated audio. Supported formats are
+              `flac, mp3, mulaw, ogg, wav`.
+
+          sample_rate: The sample rate for generated audio
+
+          speed: The speed of the generated audio.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"Accept": "audio/wav", **(extra_headers or {})}
+        return await self._post(
+            "/openai/v1/audio/speech",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "voice": voice,
+                    "response_format": response_format,
+                    "sample_rate": sample_rate,
+                    "speed": speed,
+                },
+                speech_create_params.SpeechCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AsyncBinaryAPIResponse,
+        )
+
+
+class SpeechWithRawResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = to_custom_raw_response_wrapper(
+            speech.create,
+            BinaryAPIResponse,
+        )
+
+
+class AsyncSpeechWithRawResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_raw_response_wrapper(
+            speech.create,
+            AsyncBinaryAPIResponse,
+        )
+
+
+class SpeechWithStreamingResponse:
+    def __init__(self, speech: Speech) -> None:
+        self._speech = speech
+
+        self.create = to_custom_streamed_response_wrapper(
+            speech.create,
+            StreamedBinaryAPIResponse,
+        )
+
+
+class AsyncSpeechWithStreamingResponse:
+    def __init__(self, speech: AsyncSpeech) -> None:
+        self._speech = speech
+
+        self.create = async_to_custom_streamed_response_wrapper(
+            speech.create,
+            AsyncStreamedBinaryAPIResponse,
+        )
@@ -0,0 +1,494 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...types.audio import transcription_create_params
+from ..._base_client import make_request_options
+from ...types.audio.transcription import Transcription
+
+__all__ = ["Transcriptions", "AsyncTranscriptions"]
+
+
+class Transcriptions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return TranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return TranscriptionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["whisper-large-v3", "whisper-large-v3-turbo"]],
+        file: FileTypes | NotGiven = NOT_GIVEN,
+        language: Union[
+            str,
+            Literal[
+                "en",
+                "zh",
+                "de",
+                "es",
+                "ru",
+                "ko",
+                "fr",
+                "ja",
+                "pt",
+                "tr",
+                "pl",
+                "ca",
+                "nl",
+                "ar",
+                "sv",
+                "it",
+                "id",
+                "hi",
+                "fi",
+                "vi",
+                "he",
+                "uk",
+                "el",
+                "ms",
+                "cs",
+                "ro",
+                "da",
+                "hu",
+                "ta",
+                "no",
+                "th",
+                "ur",
+                "hr",
+                "bg",
+                "lt",
+                "la",
+                "mi",
+                "ml",
+                "cy",
+                "sk",
+                "te",
+                "fa",
+                "lv",
+                "bn",
+                "sr",
+                "az",
+                "sl",
+                "kn",
+                "et",
+                "mk",
+                "br",
+                "eu",
+                "is",
+                "hy",
+                "ne",
+                "mn",
+                "bs",
+                "kk",
+                "sq",
+                "sw",
+                "gl",
+                "mr",
+                "pa",
+                "si",
+                "km",
+                "sn",
+                "yo",
+                "so",
+                "af",
+                "oc",
+                "ka",
+                "be",
+                "tg",
+                "sd",
+                "gu",
+                "am",
+                "yi",
+                "lo",
+                "uz",
+                "fo",
+                "ht",
+                "ps",
+                "tk",
+                "nn",
+                "mt",
+                "sa",
+                "lb",
+                "my",
+                "bo",
+                "tl",
+                "mg",
+                "as",
+                "tt",
+                "haw",
+                "ln",
+                "ha",
+                "ba",
+                "jv",
+                "su",
+                "yue",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "verbose_json"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        url: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          model: ID of the model to use. `whisper-large-v3` and `whisper-large-v3-turbo` are
+              currently available.
+
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. Either a file or a URL must
+              be provided. Note that the file field is not supported in Batch API requests.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
+              improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The [prompt](/docs/speech-text) should match the audio language.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`, or
+              `verbose_json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          url: The audio URL to translate/transcribe (supports Base64URL). Either a file or a
+              URL must be provided. For Batch API requests, the URL field is required since
+              the file field is not supported.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "model": model,
+                "file": file,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+                "url": url,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/openai/v1/audio/transcriptions",
+            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Transcription,
+        )
+
+
+class AsyncTranscriptions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranscriptionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncTranscriptionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["whisper-large-v3", "whisper-large-v3-turbo"]],
+        file: FileTypes | NotGiven = NOT_GIVEN,
+        language: Union[
+            str,
+            Literal[
+                "en",
+                "zh",
+                "de",
+                "es",
+                "ru",
+                "ko",
+                "fr",
+                "ja",
+                "pt",
+                "tr",
+                "pl",
+                "ca",
+                "nl",
+                "ar",
+                "sv",
+                "it",
+                "id",
+                "hi",
+                "fi",
+                "vi",
+                "he",
+                "uk",
+                "el",
+                "ms",
+                "cs",
+                "ro",
+                "da",
+                "hu",
+                "ta",
+                "no",
+                "th",
+                "ur",
+                "hr",
+                "bg",
+                "lt",
+                "la",
+                "mi",
+                "ml",
+                "cy",
+                "sk",
+                "te",
+                "fa",
+                "lv",
+                "bn",
+                "sr",
+                "az",
+                "sl",
+                "kn",
+                "et",
+                "mk",
+                "br",
+                "eu",
+                "is",
+                "hy",
+                "ne",
+                "mn",
+                "bs",
+                "kk",
+                "sq",
+                "sw",
+                "gl",
+                "mr",
+                "pa",
+                "si",
+                "km",
+                "sn",
+                "yo",
+                "so",
+                "af",
+                "oc",
+                "ka",
+                "be",
+                "tg",
+                "sd",
+                "gu",
+                "am",
+                "yi",
+                "lo",
+                "uz",
+                "fo",
+                "ht",
+                "ps",
+                "tk",
+                "nn",
+                "mt",
+                "sa",
+                "lb",
+                "my",
+                "bo",
+                "tl",
+                "mg",
+                "as",
+                "tt",
+                "haw",
+                "ln",
+                "ha",
+                "ba",
+                "jv",
+                "su",
+                "yue",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "verbose_json"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
+        url: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Transcription:
+        """
+        Transcribes audio into the input language.
+
+        Args:
+          model: ID of the model to use. `whisper-large-v3` and `whisper-large-v3-turbo` are
+              currently available.
+
+          file:
+              The audio file object (not file name) to transcribe, in one of these formats:
+              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm. Either a file or a URL must
+              be provided. Note that the file field is not supported in Batch API requests.
+
+          language: The language of the input audio. Supplying the input language in
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
+              improve accuracy and latency.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The [prompt](/docs/speech-text) should match the audio language.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`, or
+              `verbose_json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          timestamp_granularities: The timestamp granularities to populate for this transcription.
+              `response_format` must be set `verbose_json` to use timestamp granularities.
+              Either or both of these options are supported: `word`, or `segment`. Note: There
+              is no additional latency for segment timestamps, but generating word timestamps
+              incurs additional latency.
+
+          url: The audio URL to translate/transcribe (supports Base64URL). Either a file or a
+              URL must be provided. For Batch API requests, the URL field is required since
+              the file field is not supported.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "model": model,
+                "file": file,
+                "language": language,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "timestamp_granularities": timestamp_granularities,
+                "url": url,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/openai/v1/audio/transcriptions",
+            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Transcription,
+        )
+
+
+class TranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithRawResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = async_to_raw_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class TranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: Transcriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = to_streamed_response_wrapper(
+            transcriptions.create,
+        )
+
+
+class AsyncTranscriptionsWithStreamingResponse:
+    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
+        self._transcriptions = transcriptions
+
+        self.create = async_to_streamed_response_wrapper(
+            transcriptions.create,
+        )
@@ -0,0 +1,254 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ...types.audio import translation_create_params
+from ..._base_client import make_request_options
+from ...types.audio.translation import Translation
+
+__all__ = ["Translations", "AsyncTranslations"]
+
+
+class Translations(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return TranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return TranslationsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        model: Union[str, Literal["whisper-large-v3", "whisper-large-v3-turbo"]],
+        file: FileTypes | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "verbose_json"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        url: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation:
+        """Translates audio into English.
+
+        Args:
+          model: ID of the model to use.
+
+        `whisper-large-v3` and `whisper-large-v3-turbo` are
+              currently available.
+
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The [prompt](/docs/guides/speech-to-text/prompting) should be in
+              English.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`, or
+              `verbose_json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          url: The audio URL to translate/transcribe (supports Base64URL). Either file or url
+              must be provided. When using the Batch API only url is supported.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "model": model,
+                "file": file,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "url": url,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/openai/v1/audio/translations",
+            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Translation,
+        )
+
+
+class AsyncTranslations(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncTranslationsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncTranslationsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        model: Union[str, Literal["whisper-large-v3", "whisper-large-v3-turbo"]],
+        file: FileTypes | NotGiven = NOT_GIVEN,
+        prompt: str | NotGiven = NOT_GIVEN,
+        response_format: Literal["json", "text", "verbose_json"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        url: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Translation:
+        """Translates audio into English.
+
+        Args:
+          model: ID of the model to use.
+
+        `whisper-large-v3` and `whisper-large-v3-turbo` are
+              currently available.
+
+          file: The audio file object (not file name) translate, in one of these formats: flac,
+              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+
+          prompt: An optional text to guide the model's style or continue a previous audio
+              segment. The [prompt](/docs/guides/speech-to-text/prompting) should be in
+              English.
+
+          response_format: The format of the transcript output, in one of these options: `json`, `text`, or
+              `verbose_json`.
+
+          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
+              output more random, while lower values like 0.2 will make it more focused and
+              deterministic. If set to 0, the model will use
+              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
+              automatically increase the temperature until certain thresholds are hit.
+
+          url: The audio URL to translate/transcribe (supports Base64URL). Either file or url
+              must be provided. When using the Batch API only url is supported.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "model": model,
+                "file": file,
+                "prompt": prompt,
+                "response_format": response_format,
+                "temperature": temperature,
+                "url": url,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/openai/v1/audio/translations",
+            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Translation,
+        )
+
+
+class TranslationsWithRawResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithRawResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = async_to_raw_response_wrapper(
+            translations.create,
+        )
+
+
+class TranslationsWithStreamingResponse:
+    def __init__(self, translations: Translations) -> None:
+        self._translations = translations
+
+        self.create = to_streamed_response_wrapper(
+            translations.create,
+        )
+
+
+class AsyncTranslationsWithStreamingResponse:
+    def __init__(self, translations: AsyncTranslations) -> None:
+        self._translations = translations
+
+        self.create = async_to_streamed_response_wrapper(
+            translations.create,
+        )
@@ -0,0 +1,429 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import batch_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.batch_list_response import BatchListResponse
+from ..types.batch_cancel_response import BatchCancelResponse
+from ..types.batch_create_response import BatchCreateResponse
+from ..types.batch_retrieve_response import BatchRetrieveResponse
+
+__all__ = ["Batches", "AsyncBatches"]
+
+
+class Batches(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return BatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return BatchesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        completion_window: str,
+        endpoint: Literal["/v1/chat/completions"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchCreateResponse:
+        """
+        Creates and executes a batch from an uploaded file of requests.
+        [Learn more](/docs/batch).
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Durations from `24h`
+              to `7d` are supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions` is supported.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](/docs/api-reference#files-upload) for how to upload a file.
+
+              Your input file must be formatted as a [JSONL file](/docs/batch), and must be
+              uploaded with the purpose `batch`. The file can be up to 100 MB in size.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/openai/v1/batches",
+            body=maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchRetrieveResponse:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._get(
+            f"/openai/v1/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchRetrieveResponse,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchListResponse:
+        """List your organization's batches."""
+        return self._get(
+            "/openai/v1/batches",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchListResponse,
+        )
+
+    def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchCancelResponse:
+        """
+        Cancels a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return self._post(
+            f"/openai/v1/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchCancelResponse,
+        )
+
+
+class AsyncBatches(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncBatchesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncBatchesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        completion_window: str,
+        endpoint: Literal["/v1/chat/completions"],
+        input_file_id: str,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchCreateResponse:
+        """
+        Creates and executes a batch from an uploaded file of requests.
+        [Learn more](/docs/batch).
+
+        Args:
+          completion_window: The time frame within which the batch should be processed. Durations from `24h`
+              to `7d` are supported.
+
+          endpoint: The endpoint to be used for all requests in the batch. Currently
+              `/v1/chat/completions` is supported.
+
+          input_file_id: The ID of an uploaded file that contains requests for the new batch.
+
+              See [upload file](/docs/api-reference#files-upload) for how to upload a file.
+
+              Your input file must be formatted as a [JSONL file](/docs/batch), and must be
+              uploaded with the purpose `batch`. The file can be up to 100 MB in size.
+
+          metadata: Optional custom metadata for the batch.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/openai/v1/batches",
+            body=await async_maybe_transform(
+                {
+                    "completion_window": completion_window,
+                    "endpoint": endpoint,
+                    "input_file_id": input_file_id,
+                    "metadata": metadata,
+                },
+                batch_create_params.BatchCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchRetrieveResponse:
+        """
+        Retrieves a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._get(
+            f"/openai/v1/batches/{batch_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchRetrieveResponse,
+        )
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchListResponse:
+        """List your organization's batches."""
+        return await self._get(
+            "/openai/v1/batches",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchListResponse,
+        )
+
+    async def cancel(
+        self,
+        batch_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BatchCancelResponse:
+        """
+        Cancels a batch.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not batch_id:
+            raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
+        return await self._post(
+            f"/openai/v1/batches/{batch_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BatchCancelResponse,
+        )
+
+
+class BatchesWithRawResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithRawResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_raw_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_raw_response_wrapper(
+            batches.cancel,
+        )
+
+
+class BatchesWithStreamingResponse:
+    def __init__(self, batches: Batches) -> None:
+        self._batches = batches
+
+        self.create = to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            batches.cancel,
+        )
+
+
+class AsyncBatchesWithStreamingResponse:
+    def __init__(self, batches: AsyncBatches) -> None:
+        self._batches = batches
+
+        self.create = async_to_streamed_response_wrapper(
+            batches.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            batches.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            batches.list,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            batches.cancel,
+        )
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .chat import (
+    Chat,
+    AsyncChat,
+    ChatWithRawResponse,
+    AsyncChatWithRawResponse,
+    ChatWithStreamingResponse,
+    AsyncChatWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+    "Chat",
+    "AsyncChat",
+    "ChatWithRawResponse",
+    "AsyncChatWithRawResponse",
+    "ChatWithStreamingResponse",
+    "AsyncChatWithStreamingResponse",
+]
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = ["Chat", "AsyncChat"]
+
+
+class Chat(SyncAPIResource):
+    @cached_property
+    def completions(self) -> Completions:
+        return Completions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return ChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return ChatWithStreamingResponse(self)
+
+
+class AsyncChat(AsyncAPIResource):
+    @cached_property
+    def completions(self) -> AsyncCompletions:
+        return AsyncCompletions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncChatWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncChatWithStreamingResponse(self)
+
+
+class ChatWithRawResponse:
+    def __init__(self, chat: Chat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithRawResponse:
+        return CompletionsWithRawResponse(self._chat.completions)
+
+
+class AsyncChatWithRawResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithRawResponse:
+        return AsyncCompletionsWithRawResponse(self._chat.completions)
+
+
+class ChatWithStreamingResponse:
+    def __init__(self, chat: Chat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> CompletionsWithStreamingResponse:
+        return CompletionsWithStreamingResponse(self._chat.completions)
+
+
+class AsyncChatWithStreamingResponse:
+    def __init__(self, chat: AsyncChat) -> None:
+        self._chat = chat
+
+    @cached_property
+    def completions(self) -> AsyncCompletionsWithStreamingResponse:
+        return AsyncCompletionsWithStreamingResponse(self._chat.completions)
@@ -0,0 +1,829 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional, overload
+from typing_extensions import Literal
+
+import httpx
+
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._streaming import Stream, AsyncStream
+from ...types.chat import completion_create_params
+from ..._base_client import make_request_options
+from ...types.chat.chat_completion import ChatCompletion
+from ...types.chat.chat_completion_chunk import ChatCompletionChunk
+from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: str,
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: str,
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Literal[True],
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ChatCompletionChunk]:
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: str,
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: bool,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        ...
+
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[
+            str,
+            Literal[
+                "gemma2-9b-it",
+                "llama-3.3-70b-versatile",
+                "llama-3.1-8b-instant",
+                "llama-guard-3-8b",
+                "llama3-70b-8192",
+                "llama3-8b-8192",
+            ],
+        ],
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+
+          model: ID of the model to use. For details on which models are compatible with the Chat
+              API, see available [models](https://console.groq.com/docs/models)
+
+          exclude_domains: Deprecated: Use search_settings.exclude_domains instead. A list of domains to
+              exclude from the search results when the model uses a web search tool.
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          include_domains: Deprecated: Use search_settings.include_domains instead. A list of domains to
+              include in the search results when the model uses a web search tool.
+
+          logit_bias: This is not yet supported by any of our models. Modify the likelihood of
+              specified tokens appearing in the completion.
+
+          logprobs: This is not yet supported by any of our models. Whether to return log
+              probabilities of the output tokens or not. If true, returns the log
+              probabilities of each output token returned in the `content` of `message`.
+
+          max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
+              total length of input tokens and generated tokens is limited by the model's
+              context length.
+
+          max_tokens: Deprecated in favor of `max_completion_tokens`. The maximum number of tokens
+              that can be generated in the chat completion. The total length of input tokens
+              and generated tokens is limited by the model's context length.
+
+          metadata: This parameter is not currently supported.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              the current moment, only n=1 is supported. Other values will result in a 400
+              response.
+
+          parallel_tool_calls: Whether to enable parallel function calling during tool use.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: this field is only available for qwen3 models. Set to 'none' to disable
+              reasoning. Set to 'default' or null to let Qwen reason.
+
+          reasoning_format: Specifies how to output reasoning tokens
+
+          response_format: An object specifying the format that the model must output. Setting to
+              `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs
+              which ensures the model will match your supplied JSON schema. json_schema
+              response format is only supported on llama 4 models. Setting to
+              `{ "type": "json_object" }` enables the older JSON mode, which ensures the
+              message the model generates is valid JSON. Using `json_schema` is preferred for
+              models that support it.
+
+          search_settings: Settings for web search functionality when the model uses a web search tool.
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result. Determinism is not guaranteed, and you should refer to the
+              `system_fingerprint` response parameter to monitor changes in the backend.
+
+          service_tier: The service tier to use for the request. Defaults to `on_demand`.
+
+              - `auto` will automatically select the highest tier available within the rate
+                limits of your organization.
+              - `flex` uses the flex tier, which will succeed or fail quickly.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: This parameter is not currently supported.
+
+          stream: If set, partial message deltas will be sent. Tokens will be sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message. [Example code](/docs/text-chat#streaming-a-chat-completion).
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or top_p but not
+              both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: This is not yet supported by any of our models. An integer between 0 and 20
+              specifying the number of most likely tokens to return at each token position,
+              each with an associated log probability. `logprobs` must be set to `true` if
+              this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered. We
+              generally recommend altering this or temperature but not both.
+
+          user: A unique identifier representing your end-user, which can help us monitor and
+              detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/openai/v1/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "exclude_domains": exclude_domains,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "include_domains": include_domains,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "reasoning_format": reasoning_format,
+                    "response_format": response_format,
+                    "search_settings": search_settings,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: str,
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: str,
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Literal[True],
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: str,
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: bool,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        ...
+
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[
+            str,
+            Literal[
+                "gemma2-9b-it",
+                "llama-3.3-70b-versatile",
+                "llama-3.1-8b-instant",
+                "llama-guard-3-8b",
+                "llama3-70b-8192",
+                "llama3-8b-8192",
+            ],
+        ],
+        exclude_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: Optional[completion_create_params.FunctionCall] | NotGiven = NOT_GIVEN,
+        functions: Optional[Iterable[completion_create_params.Function]] | NotGiven = NOT_GIVEN,
+        include_domains: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[Literal["none", "default"]] | NotGiven = NOT_GIVEN,
+        reasoning_format: Optional[Literal["hidden", "raw", "parsed"]] | NotGiven = NOT_GIVEN,
+        response_format: Optional[completion_create_params.ResponseFormat] | NotGiven = NOT_GIVEN,
+        search_settings: Optional[completion_create_params.SearchSettings] | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "on_demand", "flex", "performance"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: Optional[ChatCompletionToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[ChatCompletionToolParam]] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        """
+        Creates a model response for the given chat conversation.
+
+        Args:
+          messages: A list of messages comprising the conversation so far.
+
+          model: ID of the model to use. For details on which models are compatible with the Chat
+              API, see available [models](https://console.groq.com/docs/models)
+
+          exclude_domains: Deprecated: Use search_settings.exclude_domains instead. A list of domains to
+              exclude from the search results when the model uses a web search tool.
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model. `none` means the model
+              will not call a function and instead generates a message. `auto` means the model
+              can pick between generating a message or calling a function. Specifying a
+              particular function via `{"name": "my_function"}` forces the model to call that
+              function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          include_domains: Deprecated: Use search_settings.include_domains instead. A list of domains to
+              include in the search results when the model uses a web search tool.
+
+          logit_bias: This is not yet supported by any of our models. Modify the likelihood of
+              specified tokens appearing in the completion.
+
+          logprobs: This is not yet supported by any of our models. Whether to return log
+              probabilities of the output tokens or not. If true, returns the log
+              probabilities of each output token returned in the `content` of `message`.
+
+          max_completion_tokens: The maximum number of tokens that can be generated in the chat completion. The
+              total length of input tokens and generated tokens is limited by the model's
+              context length.
+
+          max_tokens: Deprecated in favor of `max_completion_tokens`. The maximum number of tokens
+              that can be generated in the chat completion. The total length of input tokens
+              and generated tokens is limited by the model's context length.
+
+          metadata: This parameter is not currently supported.
+
+          n: How many chat completion choices to generate for each input message. Note that
+              the current moment, only n=1 is supported. Other values will result in a 400
+              response.
+
+          parallel_tool_calls: Whether to enable parallel function calling during tool use.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: this field is only available for qwen3 models. Set to 'none' to disable
+              reasoning. Set to 'default' or null to let Qwen reason.
+
+          reasoning_format: Specifies how to output reasoning tokens
+
+          response_format: An object specifying the format that the model must output. Setting to
+              `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs
+              which ensures the model will match your supplied JSON schema. json_schema
+              response format is only supported on llama 4 models. Setting to
+              `{ "type": "json_object" }` enables the older JSON mode, which ensures the
+              message the model generates is valid JSON. Using `json_schema` is preferred for
+              models that support it.
+
+          search_settings: Settings for web search functionality when the model uses a web search tool.
+
+          seed: If specified, our system will make a best effort to sample deterministically,
+              such that repeated requests with the same `seed` and parameters should return
+              the same result. Determinism is not guaranteed, and you should refer to the
+              `system_fingerprint` response parameter to monitor changes in the backend.
+
+          service_tier: The service tier to use for the request. Defaults to `on_demand`.
+
+              - `auto` will automatically select the highest tier available within the rate
+                limits of your organization.
+              - `flex` uses the flex tier, which will succeed or fail quickly.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: This parameter is not currently supported.
+
+          stream: If set, partial message deltas will be sent. Tokens will be sent as data-only
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
+              as they become available, with the stream terminated by a `data: [DONE]`
+              message. [Example code](/docs/text-chat#streaming-a-chat-completion).
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or top_p but not
+              both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: This is not yet supported by any of our models. An integer between 0 and 20
+              specifying the number of most likely tokens to return at each token position,
+              each with an associated log probability. `logprobs` must be set to `true` if
+              this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered. We
+              generally recommend altering this or temperature but not both.
+
+          user: A unique identifier representing your end-user, which can help us monitor and
+              detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/openai/v1/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "exclude_domains": exclude_domains,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "include_domains": include_domains,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "reasoning_format": reasoning_format,
+                    "response_format": response_format,
+                    "search_settings": search_settings,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_raw_response_wrapper(
+            completions.create,
+        )
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import embedding_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.create_embedding_response import CreateEmbeddingResponse
+
+__all__ = ["Embeddings", "AsyncEmbeddings"]
+
+
+class Embeddings(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return EmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return EmbeddingsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input: Union[str, List[str]],
+        model: Union[str, Literal["nomic-embed-text-v1_5"]],
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model, cannot be an empty
+              string, and any array must be 2048 dimensions or less.
+
+          model: ID of the model to use.
+
+          encoding_format: The format to return the embeddings in. Can only be `float` or `base64`.
+
+          user: A unique identifier representing your end-user, which can help us monitor and
+              detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/openai/v1/embeddings",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "encoding_format": encoding_format,
+                    "user": user,
+                },
+                embedding_create_params.EmbeddingCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class AsyncEmbeddings(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEmbeddingsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncEmbeddingsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input: Union[str, List[str]],
+        model: Union[str, Literal["nomic-embed-text-v1_5"]],
+        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
+        user: Optional[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> CreateEmbeddingResponse:
+        """
+        Creates an embedding vector representing the input text.
+
+        Args:
+          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
+              inputs in a single request, pass an array of strings or array of token arrays.
+              The input must not exceed the max input tokens for the model, cannot be an empty
+              string, and any array must be 2048 dimensions or less.
+
+          model: ID of the model to use.
+
+          encoding_format: The format to return the embeddings in. Can only be `float` or `base64`.
+
+          user: A unique identifier representing your end-user, which can help us monitor and
+              detect abuse.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/openai/v1/embeddings",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "encoding_format": encoding_format,
+                    "user": user,
+                },
+                embedding_create_params.EmbeddingCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=CreateEmbeddingResponse,
+        )
+
+
+class EmbeddingsWithRawResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithRawResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = async_to_raw_response_wrapper(
+            embeddings.create,
+        )
+
+
+class EmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: Embeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = to_streamed_response_wrapper(
+            embeddings.create,
+        )
+
+
+class AsyncEmbeddingsWithStreamingResponse:
+    def __init__(self, embeddings: AsyncEmbeddings) -> None:
+        self._embeddings = embeddings
+
+        self.create = async_to_streamed_response_wrapper(
+            embeddings.create,
+        )
@@ -0,0 +1,513 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Mapping, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ..types import file_create_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    BinaryAPIResponse,
+    AsyncBinaryAPIResponse,
+    StreamedBinaryAPIResponse,
+    AsyncStreamedBinaryAPIResponse,
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    to_custom_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+    to_custom_streamed_response_wrapper,
+    async_to_custom_raw_response_wrapper,
+    async_to_custom_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.file_info_response import FileInfoResponse
+from ..types.file_list_response import FileListResponse
+from ..types.file_create_response import FileCreateResponse
+from ..types.file_delete_response import FileDeleteResponse
+
+__all__ = ["Files", "AsyncFiles"]
+
+
+class Files(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return FilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return FilesWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: Literal["batch"],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileCreateResponse:
+        """
+        Upload a file that can be used across various endpoints.
+
+        The Batch API only supports `.jsonl` files up to 100 MB in size. The input also
+        has a specific required [format](/docs/batch).
+
+        Please contact us if you need to increase these storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file. Use "batch" for
+              [Batch API](/docs/api-reference#batches).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            "/openai/v1/files",
+            body=maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileCreateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileListResponse:
+        """Returns a list of files."""
+        return self._get(
+            "/openai/v1/files",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileListResponse,
+        )
+
+    def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleteResponse:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._delete(
+            f"/openai/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileDeleteResponse,
+        )
+
+    def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> BinaryAPIResponse:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return self._get(
+            f"/openai/v1/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=BinaryAPIResponse,
+        )
+
+    def info(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileInfoResponse:
+        """
+        Returns information about a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return self._get(
+            f"/openai/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileInfoResponse,
+        )
+
+
+class AsyncFiles(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncFilesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncFilesWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        file: FileTypes,
+        purpose: Literal["batch"],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileCreateResponse:
+        """
+        Upload a file that can be used across various endpoints.
+
+        The Batch API only supports `.jsonl` files up to 100 MB in size. The input also
+        has a specific required [format](/docs/batch).
+
+        Please contact us if you need to increase these storage limits.
+
+        Args:
+          file: The File object (not file name) to be uploaded.
+
+          purpose: The intended purpose of the uploaded file. Use "batch" for
+              [Batch API](/docs/api-reference#batches).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        body = deepcopy_minimal(
+            {
+                "file": file,
+                "purpose": purpose,
+            }
+        )
+        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            "/openai/v1/files",
+            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileCreateResponse,
+        )
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileListResponse:
+        """Returns a list of files."""
+        return await self._get(
+            "/openai/v1/files",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileListResponse,
+        )
+
+    async def delete(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileDeleteResponse:
+        """
+        Delete a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._delete(
+            f"/openai/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileDeleteResponse,
+        )
+
+    async def content(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncBinaryAPIResponse:
+        """
+        Returns the contents of the specified file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
+        return await self._get(
+            f"/openai/v1/files/{file_id}/content",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=AsyncBinaryAPIResponse,
+        )
+
+    async def info(
+        self,
+        file_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> FileInfoResponse:
+        """
+        Returns information about a file.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        return await self._get(
+            f"/openai/v1/files/{file_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=FileInfoResponse,
+        )
+
+
+class FilesWithRawResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_raw_response_wrapper(
+            files.create,
+        )
+        self.list = to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = to_custom_raw_response_wrapper(
+            files.content,
+            BinaryAPIResponse,
+        )
+        self.info = to_raw_response_wrapper(
+            files.info,
+        )
+
+
+class AsyncFilesWithRawResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_raw_response_wrapper(
+            files.create,
+        )
+        self.list = async_to_raw_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            files.delete,
+        )
+        self.content = async_to_custom_raw_response_wrapper(
+            files.content,
+            AsyncBinaryAPIResponse,
+        )
+        self.info = async_to_raw_response_wrapper(
+            files.info,
+        )
+
+
+class FilesWithStreamingResponse:
+    def __init__(self, files: Files) -> None:
+        self._files = files
+
+        self.create = to_streamed_response_wrapper(
+            files.create,
+        )
+        self.list = to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = to_custom_streamed_response_wrapper(
+            files.content,
+            StreamedBinaryAPIResponse,
+        )
+        self.info = to_streamed_response_wrapper(
+            files.info,
+        )
+
+
+class AsyncFilesWithStreamingResponse:
+    def __init__(self, files: AsyncFiles) -> None:
+        self._files = files
+
+        self.create = async_to_streamed_response_wrapper(
+            files.create,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            files.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            files.delete,
+        )
+        self.content = async_to_custom_streamed_response_wrapper(
+            files.content,
+            AsyncStreamedBinaryAPIResponse,
+        )
+        self.info = async_to_streamed_response_wrapper(
+            files.info,
+        )
@@ -0,0 +1,293 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..types.model import Model
+from .._base_client import make_request_options
+from ..types.model_deleted import ModelDeleted
+from ..types.model_list_response import ModelListResponse
+
+__all__ = ["Models", "AsyncModels"]
+
+
+class Models(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return ModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return ModelsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Get a specific model
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return self._get(
+            f"/openai/v1/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Model,
+        )
+
+    def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelListResponse:
+        """get all available models"""
+        return self._get(
+            "/openai/v1/models",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelListResponse,
+        )
+
+    def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """
+        Delete a model
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return self._delete(
+            f"/openai/v1/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class AsyncModels(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/groq/groq-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncModelsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/groq/groq-python#with_streaming_response
+        """
+        return AsyncModelsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Model:
+        """
+        Get a specific model
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return await self._get(
+            f"/openai/v1/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Model,
+        )
+
+    async def list(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelListResponse:
+        """get all available models"""
+        return await self._get(
+            "/openai/v1/models",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelListResponse,
+        )
+
+    async def delete(
+        self,
+        model: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ModelDeleted:
+        """
+        Delete a model
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not model:
+            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
+        return await self._delete(
+            f"/openai/v1/models/{model}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ModelDeleted,
+        )
+
+
+class ModelsWithRawResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithRawResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_raw_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_raw_response_wrapper(
+            models.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            models.delete,
+        )
+
+
+class ModelsWithStreamingResponse:
+    def __init__(self, models: Models) -> None:
+        self._models = models
+
+        self.retrieve = to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            models.delete,
+        )
+
+
+class AsyncModelsWithStreamingResponse:
+    def __init__(self, models: AsyncModels) -> None:
+        self._models = models
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            models.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            models.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            models.delete,
+        )