{"$schema":"https://json-schema.org/draft/2020-12/schema","type":"object","properties":{"audio_url":{"description":"The URL of the audio file to transcribe. Can be a publicly accessible URL or a data URI (data:audio/...;base64,...). For data URIs, the audio will be uploaded to AssemblyAI automatically.","type":"string"},"language_code":{"description":"The language code for the audio file (e.g., \"en\", \"es\", \"fr\"). Defaults to automatic language detection.","type":"string"},"language_detection":{"description":"Enable automatic language detection. When enabled with speech_models, the system will automatically select the best model for the detected language.","type":"boolean"},"prompt":{"description":"A custom prompt to guide transcription style, formatting, and output characteristics. Maximum 1,500 words.","type":"string"},"keyterms_prompt":{"description":"An array of up to 1,000 words or phrases (max 6 words per phrase) to improve transcription accuracy. Cannot be used with the prompt parameter.","type":"array","items":{"type":"string"}},"temperature":{"description":"Controls randomness in model output (0.0-1.0). Lower values make output more deterministic. Default is 0.0.","type":"number","minimum":0,"maximum":1},"speaker_labels":{"description":"Enable speaker diarization to identify different speakers in the audio.","type":"boolean"},"speakers_expected":{"description":"Expected number of speakers for speaker diarization.","type":"integer","minimum":1,"maximum":9007199254740991},"auto_chapters":{"description":"Enable automatic chapter detection.","type":"boolean"},"entity_detection":{"description":"Enable detection of entities like names, organizations, and locations.","type":"boolean"},"sentiment_analysis":{"description":"Enable sentiment analysis for each sentence.","type":"boolean"},"auto_highlights":{"description":"Enable automatic extraction of key phrases and highlights.","type":"boolean"},"content_safety":{"description":"Enable content safety detection for sensitive content.","type":"boolean"},"iab_categories":{"description":"Enable IAB (Interactive Advertising Bureau) content taxonomy classification.","type":"boolean"},"custom_spelling":{"description":"Custom spelling rules to replace specific words or phrases in the transcription output.","type":"array","items":{"type":"object","properties":{"from":{"type":"array","items":{"type":"string"}},"to":{"type":"string"}},"required":["from","to"],"additionalProperties":false}},"disfluencies":{"description":"Include filler words like \"um\", \"uh\", etc. in the transcript.","type":"boolean"},"multichannel":{"description":"Process each audio channel separately for multi-channel audio files.","type":"boolean"},"dual_channel":{"description":"Process audio as dual-channel (stereo) for better accuracy.","type":"boolean"},"webhook_url":{"description":"URL to receive webhook notifications when transcription is complete.","type":"string","format":"uri"},"audio_start_from":{"description":"Timestamp (in milliseconds) to start transcription from.","type":"integer","minimum":0,"maximum":9007199254740991},"audio_end_at":{"description":"Timestamp (in milliseconds) to end transcription at.","type":"integer","minimum":0,"maximum":9007199254740991},"word_boost":{"description":"Array of words to boost recognition accuracy (legacy - use keyterms_prompt instead).","type":"array","items":{"type":"string"}},"boost_param":{"description":"How much to boost the words in word_boost.","type":"string","enum":["low","default","high"]},"filter_profanity":{"description":"Filter profanity from the transcription.","type":"boolean"},"redact_pii":{"description":"Redact personally identifiable information.","type":"boolean"},"redact_pii_audio":{"description":"Generate a redacted audio file with PII removed.","type":"boolean"},"redact_pii_policies":{"description":"Specific PII policies to apply for redaction.","type":"array","items":{"type":"string"}},"redact_pii_sub":{"description":"Strategy for substituting redacted PII.","type":"string","enum":["entity_name","hash"]},"speech_threshold":{"description":"Confidence threshold for speech detection.","type":"number","minimum":0,"maximum":1},"domain":{"description":"Domain-specific transcription mode. \"medical-v1\" enables medical terminology optimization.","type":"string","enum":["medical-v1"]}},"required":["audio_url"],"additionalProperties":false}