{"type":"object","contentType":"application/json","properties":{"transcription_info":{"type":"object","properties":{"language":{"type":"string","description":"The language of the audio being transcribed or translated."},"language_probability":{"type":"number","description":"The confidence level or probability of the detected language being accurate, represented as a decimal between 0 and 1."},"duration":{"type":"number","description":"The total duration of the original audio file, in seconds."},"duration_after_vad":{"type":"number","description":"The duration of the audio after applying Voice Activity Detection (VAD) to remove silent or irrelevant sections, in seconds."}}},"text":{"type":"string","description":"The complete transcription of the audio."},"word_count":{"type":"number","description":"The total number of words in the transcription."},"segments":{"type":"array","items":{"type":"object","properties":{"start":{"type":"number","description":"The starting time of the segment within the audio, in seconds."},"end":{"type":"number","description":"The ending time of the segment within the audio, in seconds."},"text":{"type":"string","description":"The transcription of the segment."},"temperature":{"type":"number","description":"The temperature used in the decoding process, controlling randomness in predictions. Lower values result in more deterministic outputs."},"avg_logprob":{"type":"number","description":"The average log probability of the predictions for the words in this segment, indicating overall confidence."},"compression_ratio":{"type":"number","description":"The compression ratio of the input to the output, measuring how much the text was compressed during the transcription process."},"no_speech_prob":{"type":"number","description":"The probability that the segment contains no speech, represented as a decimal between 0 and 1."},"words":{"type":"array","items":{"type":"object","properties":{"word":{"type":"string","description":"The individual word transcribed from the audio."},"start":{"type":"number","description":"The starting time of the word within the audio, in seconds."},"end":{"type":"number","description":"The ending time of the word within the audio, in seconds."}}}}}}},"vtt":{"type":"string","description":"The transcription in WebVTT format, which includes timing and text information for use in subtitles."}},"required":["text"]}