{"$schema":"https://json-schema.org/draft/2020-12/schema","type":"object","properties":{"text":{"description":"The text to convert to speech. Maximum 10,000 characters.","type":"string","maxLength":10000},"voice_id":{"description":"The voice ID to use for synthesis","default":"English_expressive_narrator","type":"string"},"speed":{"description":"Speech speed (0.5 to 2)","default":1,"type":"number","minimum":0.5,"maximum":2},"volume":{"description":"Speech volume (0 to 10)","default":1,"type":"number","minimum":0,"maximum":10},"pitch":{"description":"Pitch adjustment (-12 to 12)","default":0,"type":"integer","minimum":-12,"maximum":12},"emotion":{"description":"Emotion control for synthesized speech","type":"string","enum":["happy","sad","angry","fearful","disgusted","surprised","calm","fluent"]},"format":{"description":"Output audio format","default":"mp3","type":"string","enum":["mp3","flac","wav"]},"sample_rate":{"description":"Audio sample rate","anyOf":[{"type":"number","const":8000},{"type":"number","const":16000},{"type":"number","const":22050},{"type":"number","const":24000},{"type":"number","const":32000},{"type":"number","const":44100}]}},"required":["text","voice_id","speed","volume","pitch","format"],"additionalProperties":false}