{"type":"object","properties":{"audio":{"anyOf":[{"type":"string","description":"Base64 encoded value of the audio data."},{"type":"object","properties":{"body":{"type":"object"},"contentType":{"type":"string"}}}]},"task":{"type":"string","default":"transcribe","description":"Supported tasks are 'translate' or 'transcribe'."},"language":{"type":"string","description":"The language of the audio being transcribed or translated."},"vad_filter":{"type":"boolean","default":false,"description":"Preprocess the audio with a voice activity detection model."},"initial_prompt":{"type":"string","description":"A text prompt to help provide context to the model on the contents of the audio."},"prefix":{"type":"string","description":"The prefix appended to the beginning of the output of the transcription and can guide the transcription result."},"beam_size":{"type":"integer","default":5,"description":"The number of beams to use in beam search decoding. Higher values may improve accuracy at the cost of speed."},"condition_on_previous_text":{"type":"boolean","default":true,"description":"Whether to condition on previous text during transcription. Setting to false may help prevent hallucination loops."},"no_speech_threshold":{"type":"number","default":0.6,"description":"Threshold for detecting no-speech segments. Segments with no-speech probability above this value are skipped."},"compression_ratio_threshold":{"type":"number","default":2.4,"description":"Threshold for filtering out segments with high compression ratio, which often indicate repetitive or hallucinated text."},"log_prob_threshold":{"type":"number","default":-1,"description":"Threshold for filtering out segments with low average log probability, indicating low confidence."},"hallucination_silence_threshold":{"type":"number","description":"Optional threshold (in seconds) to skip silent periods that may cause hallucinations."}},"required":["audio"]}