> ## Documentation Index
> Fetch the complete documentation index at: https://heygen-1fa696a7.mintlify.app/llms.txt
> Use this file to discover all available pages before exploring further.

# Generate Speech

> Synthesize speech audio from text using a specified voice. The voice must support the starfish engine — use GET /v3/voices?engine=starfish to find compatible voices. Supports plain text and SSML. Speed range: 0.5–2.0x. Returns a URL to the generated audio file along with duration and optional word-level timestamps.



## OpenAPI

````yaml /openapi/external-api.json post /v3/voices/speech
openapi: 3.1.0
info:
  title: HeyGen External API
  version: 1.0.0
  description: >-
    HeyGen's external API for programmatic AI video creation. See
    https://docs.heygen.com for full documentation.
  contact:
    name: HeyGen Product Infra
    url: https://heygen.com
servers:
  - url: https://api.heygen.com
    description: Production
security:
  - ApiKeyAuth: []
  - BearerAuth: []
tags:
  - name: Video Agent
    description: Create videos from text prompts using AI
  - name: Videos
    description: Create, list, retrieve, and delete videos
  - name: Voices
    description: Text-to-speech and voice management
  - name: Audio
    description: Search the background-music and sound-effects catalog
  - name: Video Translate
    description: Translate videos into other languages
  - name: User
    description: Account information and billing
  - name: Avatars
    description: List and manage avatars and looks
  - name: Assets
    description: Upload files for use in video creation
  - name: Webhooks
    description: Manage webhook endpoints and events
  - name: Lipsync
    description: Dub or replace audio on existing videos
  - name: Brand
    description: >-
      Brand-related resources — brand kits (colors, fonts, logos) and brand
      glossaries (custom term translations)
paths:
  /v3/voices/speech:
    post:
      tags:
        - Voices
      summary: Generate Speech
      description: >-
        Synthesize speech audio from text using a specified voice. The voice
        must support the starfish engine — use GET /v3/voices?engine=starfish to
        find compatible voices. Supports plain text and SSML. Speed range:
        0.5–2.0x. Returns a URL to the generated audio file along with duration
        and optional word-level timestamps.
      operationId: createSpeechV3
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/TextToSpeechRequest'
      responses:
        '200':
          description: Successful response
          content:
            application/json:
              schema:
                type: object
                properties:
                  data:
                    $ref: '#/components/schemas/TextToSpeechResponseData'
        '400':
          description: Invalid request parameters
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    $ref: '#/components/schemas/StandardAPIError'
              example:
                error:
                  code: invalid_parameter
                  message: '''voice_id'' is required.'
                  param: voice_id
                  doc_url: null
        '401':
          description: Authentication failed
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    $ref: '#/components/schemas/StandardAPIError'
              example:
                error:
                  code: authentication_failed
                  message: Invalid or expired API key. Verify your x-api-key header.
                  param: null
                  doc_url: null
        '429':
          description: Rate limit exceeded
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    $ref: '#/components/schemas/StandardAPIError'
              example:
                error:
                  code: rate_limit_exceeded
                  message: >-
                    Too many requests. Retry after the duration specified in the
                    Retry-After header.
                  param: null
                  doc_url: null
          headers:
            Retry-After:
              description: Seconds to wait before retrying
              schema:
                type: integer
      security:
        - ApiKeyAuth: []
        - BearerAuth: []
components:
  schemas:
    TextToSpeechRequest:
      additionalProperties: false
      description: Request body for text-to-speech generation.
      properties:
        text:
          description: Text to synthesize (1-5000 characters).
          maxLength: 5000
          minLength: 1
          title: Text
          type: string
        voice_id:
          description: >-
            Voice ID to use. The voice must support the starfish engine. Filter
            compatible voices by passing engine=starfish to the voice listing
            endpoint.
          title: Voice Id
          type: string
        input_type:
          default: text
          description: >-
            Type of the input: 'text' for plain text, 'ssml' for SSML markup.
            Defaults to 'text'.
          title: Input Type
          type: string
        speed:
          default: 1
          description: Speed multiplier (0.5-2.0).
          maximum: 2
          minimum: 0.5
          title: Speed
          type: number
        language:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: >-
            Base language code (e.g. 'en', 'pt', 'zh'). Optional — auto-detected
            from text when omitted.
          title: Language
        locale:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: >-
            BCP-47 locale tag (e.g. 'en-US', 'pt-BR'). When set, language is
            inferred from locale.
          title: Locale
      required:
        - text
        - voice_id
      title: TextToSpeechRequest
      type: object
    TextToSpeechResponseData:
      description: Response payload for text-to-speech generation.
      properties:
        audio_url:
          description: URL of the generated audio file.
          examples:
            - https://files.heygen.ai/audio/tts_abc123.mp3
          title: Audio Url
          type: string
        duration:
          description: Duration of the audio in seconds.
          examples:
            - 4.5
          title: Duration
          type: number
        request_id:
          anyOf:
            - type: string
            - type: 'null'
          default: null
          description: Unique identifier for this generation request.
          examples:
            - req_abc123
          title: Request Id
        word_timestamps:
          anyOf:
            - items:
                $ref: '#/components/schemas/WordWithTimestamp'
              type: array
            - type: 'null'
          default: null
          description: Word-level timing data.
          title: Word Timestamps
      required:
        - audio_url
        - duration
      title: TextToSpeechResponseData
      type: object
    StandardAPIError:
      type: object
      properties:
        code:
          type: string
          description: Machine-readable error code
          example: invalid_parameter
        message:
          type: string
          description: Human-readable error message
          example: Video not found
        param:
          type:
            - string
            - 'null'
          description: Which request field caused the error
        doc_url:
          type:
            - string
            - 'null'
          description: Link to error documentation
      required:
        - code
        - message
    WordWithTimestamp:
      description: Word-level timing data from TTS generation.
      properties:
        word:
          description: The word.
          examples:
            - Hello
          title: Word
          type: string
        start:
          description: Start time in seconds.
          examples:
            - 0
          title: Start
          type: number
        end:
          description: End time in seconds.
          examples:
            - 0.35
          title: End
          type: number
      required:
        - word
        - start
        - end
      title: WordWithTimestamp
      type: object
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: x-api-key
      description: HeyGen API key. Obtain from your HeyGen dashboard.
    BearerAuth:
      type: http
      scheme: bearer
      description: OAuth2 bearer token.

````