Documentation
¶
Index ¶
- Constants
- type ASRChunk
- type ASRServiceV2
- type ASRV2Config
- type ASRV2Result
- type ASRV2Session
- type ASRV2Utterance
- type ASRV2Word
- type ASTAudioConfig
- type ASTTargetAudioConfig
- type ASTTranslateBillingItem
- type ASTTranslateConfig
- type ASTTranslateCorpus
- type ASTTranslateEvent
- type ASTTranslateEventType
- type ASTTranslateMode
- type ASTTranslateService
- type ASTTranslateSession
- func (s *ASTTranslateSession) Close() error
- func (s *ASTTranslateSession) Finish(ctx context.Context) error
- func (s *ASTTranslateSession) Recv() iter.Seq2[*ASTTranslateEvent, error]
- func (s *ASTTranslateSession) RecvEvent(ctx context.Context) (*ASTTranslateEvent, error)
- func (s *ASTTranslateSession) SendAudio(ctx context.Context, audio []byte) error
- func (s *ASTTranslateSession) SessionID() string
- func (s *ASTTranslateSession) UpdateConfig(ctx context.Context, update ASTTranslateUpdate) error
- type ASTTranslateUpdate
- type ASTTranslateUsage
- type ASTUser
- type AudioFormat
- type Client
- type Error
- type Language
- type MultipartFile
- type Option
- func WithAPIKey(apiKey string) Option
- func WithBaseURL(url string) Option
- func WithBearerToken(token string) Option
- func WithCluster(cluster string) Option
- func WithHTTPClient(client *http.Client) Option
- func WithHTTPTransport(doer transport.HTTPDoer) Option
- func WithRealtimeAPIKey(accessKey, appKey string) Option
- func WithResourceID(resourceID string) Option
- func WithTimeout(timeout time.Duration) Option
- func WithUserID(userID string) Option
- func WithV2APIKey(accessKey, appKey string) Option
- func WithWebSocketURL(url string) Option
- type RealtimeASRConfig
- type RealtimeASRResult
- type RealtimeAudioConfig
- type RealtimeConfig
- type RealtimeConnection
- type RealtimeConversationMessage
- type RealtimeDialogConfig
- type RealtimeEvent
- type RealtimeEventType
- type RealtimeGenerationProps
- type RealtimeInputMode
- type RealtimeModelVersion
- type RealtimePromptConfig
- type RealtimeService
- type RealtimeSession
- func (s *RealtimeSession) Close() error
- func (s *RealtimeSession) EndASR(ctx context.Context) error
- func (s *RealtimeSession) FinishSession(ctx context.Context) error
- func (s *RealtimeSession) Interrupt(ctx context.Context) error
- func (s *RealtimeSession) Recv() iter.Seq2[*RealtimeEvent, error]
- func (s *RealtimeSession) RecvEvent(ctx context.Context) (*RealtimeEvent, error)
- func (s *RealtimeSession) ReplaceHistory(index int, message RealtimeConversationMessage) error
- func (s *RealtimeSession) SayHello(ctx context.Context, content string) error
- func (s *RealtimeSession) SendAudio(ctx context.Context, audio []byte) error
- func (s *RealtimeSession) SendTTSText(ctx context.Context, text string) error
- func (s *RealtimeSession) SendText(ctx context.Context, text string) error
- func (s *RealtimeSession) SendUserMessage(ctx context.Context, text string) error
- func (s *RealtimeSession) SessionID() string
- func (s *RealtimeSession) UpdateHistory(history []RealtimeConversationMessage)
- func (s *RealtimeSession) UpdatePrompt(prompt RealtimePromptConfig)
- func (s *RealtimeSession) UpdateProps(props RealtimeGenerationProps)
- type RealtimeTTSConfig
- type RealtimeUsage
- type SampleRate
- type StreamASRConfig
- type TTSServiceV2
- type TTSV2Chunk
- type TTSV2MixSpeaker
- type TTSV2MixSpeakerSource
- type TTSV2Request
- type TTSV2WSChunk
- type TTSV2WSConfig
- type TTSV2WSSession
- func (s *TTSV2WSSession) CancelSession(ctx context.Context) error
- func (s *TTSV2WSSession) Close() error
- func (s *TTSV2WSSession) Recv() iter.Seq2[*TTSV2WSChunk, error]
- func (s *TTSV2WSSession) SendText(ctx context.Context, text string, isLast bool) error
- func (s *TTSV2WSSession) StartNextSession(ctx context.Context) error
- type Task
- type TaskFailureMapper
- type TaskPoller
- type TaskStatus
- type TaskStatusMapper
- type Utterance
- type VoiceCloneRequest
- type VoiceCloneService
- func (s *VoiceCloneService) Activate(ctx context.Context, voiceID string) error
- func (s *VoiceCloneService) GetStatus(ctx context.Context, speakerOrVoiceID string) (*VoiceCloneStatus, error)
- func (s *VoiceCloneService) Submit(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
- func (s *VoiceCloneService) Upload(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
- type VoiceCloneStatus
- type Word
Constants ¶
const ( AppKeyRealtime = "PlgvMymc7f3tQnJ6" AppKeyPodcast = "aGjiRDfUWi" )
V2/V3 fixed app keys (official constants, not user credentials).
const ( ResourceTTSV1 = "seed-tts-1.0" ResourceTTSV1Concurr = "seed-tts-1.0-concurr" ResourceTTSV2 = "seed-tts-2.0" ResourceTTSV2Concurr = "seed-tts-2.0-concurr" ResourceVoiceCloneV1 = "seed-icl-1.0" ResourceVoiceCloneV2 = "seed-icl-2.0" ResourceASRStream = "volc.bigasr.sauc.duration" ResourceASRStreamV2 = "volc.seedasr.sauc.duration" ResourceASRFile = "volc.bigasr.auc.duration" ResourceRealtime = "volc.speech.dialog" ResourcePodcast = "volc.service_type.10050" ResourceTranslation = "volc.megatts.simt" ResourceASTTranslate = "volc.service_type.10053" )
V2/V3 Resource IDs.
const ( CodeSuccess = 3000 CodeParamError = 3001 CodeAuthError = 3002 CodeRateLimit = 3003 CodeQuotaExceed = 3004 CodeServerError = 3005 CodeASRSuccess = 1000 )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ASRChunk ¶
type ASRChunk = ASRV2Result
type ASRServiceV2 ¶
type ASRServiceV2 struct {
// contains filtered or unexported fields
}
ASRServiceV2 provides SAUC WebSocket streaming recognition.
func (*ASRServiceV2) OpenStreamSession ¶
func (s *ASRServiceV2) OpenStreamSession(ctx context.Context, cfg *ASRV2Config) (*ASRV2Session, error)
OpenStreamSession opens a SAUC V2 WebSocket session.
type ASRV2Config ¶
type ASRV2Config struct {
Format AudioFormat `json:"format" yaml:"format"`
SampleRate SampleRate `json:"sample_rate" yaml:"sample_rate"`
Channel int `json:"channel,omitempty" yaml:"channel,omitempty"`
Channels int `json:"channels,omitempty" yaml:"channels,omitempty"` // Backward-compatible alias field.
Bits int `json:"bits,omitempty" yaml:"bits,omitempty"`
Language Language `json:"language,omitempty" yaml:"language,omitempty"`
EnableITN bool `json:"enable_itn,omitempty" yaml:"enable_itn,omitempty"`
EnablePunc bool `json:"enable_punc,omitempty" yaml:"enable_punc,omitempty"`
EnableDiarization bool `json:"enable_diarization,omitempty" yaml:"enable_diarization,omitempty"`
SpeakerNum int `json:"speaker_num,omitempty" yaml:"speaker_num,omitempty"`
Hotwords []string `json:"hotwords,omitempty" yaml:"hotwords,omitempty"`
ResultType string `json:"result_type,omitempty" yaml:"result_type,omitempty"` // single/full
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
}
ASRV2Config is SAUC V2 streaming session config.
type ASRV2Result ¶
type ASRV2Result struct {
Text string `json:"text"`
Utterances []ASRV2Utterance `json:"utterances,omitempty"`
IsFinal bool `json:"is_final"`
Duration int `json:"duration,omitempty"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
}
ASRV2Result is one parsed server response.
type ASRV2Session ¶
type ASRV2Session struct {
// contains filtered or unexported fields
}
ASRV2Session represents one streaming recognition session.
func (*ASRV2Session) Recv ¶
func (s *ASRV2Session) Recv() iter.Seq2[*ASRV2Result, error]
Recv yields recognition results as a stream.
type ASRV2Utterance ¶
type ASRV2Utterance struct {
Text string `json:"text"`
StartTime int `json:"start_time"`
EndTime int `json:"end_time"`
Definite bool `json:"definite"`
SpeakerID string `json:"speaker_id,omitempty"`
Words []ASRV2Word `json:"words,omitempty"`
Confidence float64 `json:"confidence,omitempty"`
}
ASRV2Utterance contains utterance-level info.
type ASRV2Word ¶
type ASRV2Word struct {
Text string `json:"text"`
StartTime int `json:"start_time"`
EndTime int `json:"end_time"`
Conf float64 `json:"conf,omitempty"`
}
ASRV2Word contains word-level timing info.
type ASTAudioConfig ¶
type ASTAudioConfig struct {
Format AudioFormat `json:"format" yaml:"format"`
Codec string `json:"codec,omitempty" yaml:"codec,omitempty"`
Rate SampleRate `json:"rate" yaml:"rate"`
Bits int `json:"bits" yaml:"bits"`
Channel int `json:"channel" yaml:"channel"`
}
type ASTTargetAudioConfig ¶
type ASTTargetAudioConfig struct {
Format AudioFormat `json:"format,omitempty" yaml:"format,omitempty"`
Rate SampleRate `json:"rate,omitempty" yaml:"rate,omitempty"`
Bits int `json:"bits,omitempty" yaml:"bits,omitempty"`
Channel int `json:"channel,omitempty" yaml:"channel,omitempty"`
}
type ASTTranslateBillingItem ¶
type ASTTranslateConfig ¶
type ASTTranslateConfig struct {
SessionID string `json:"session_id,omitempty" yaml:"session_id,omitempty"`
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
Mode ASTTranslateMode `json:"mode" yaml:"mode"`
SourceLanguage string `json:"source_language" yaml:"source_language"`
TargetLanguage string `json:"target_language" yaml:"target_language"`
SourceAudio ASTAudioConfig `json:"source_audio" yaml:"source_audio"`
TargetAudio ASTTargetAudioConfig `json:"target_audio,omitempty" yaml:"target_audio,omitempty"`
SpeakerID string `json:"speaker_id,omitempty" yaml:"speaker_id,omitempty"`
IsCustomSpeaker bool `json:"is_custom_speaker,omitempty" yaml:"is_custom_speaker,omitempty"`
TTSResourceID string `json:"tts_resource_id,omitempty" yaml:"tts_resource_id,omitempty"`
SpeechRate int `json:"speech_rate,omitempty" yaml:"speech_rate,omitempty"`
EnableSourceLanguageDetect bool `json:"enable_source_language_detect,omitempty" yaml:"enable_source_language_detect,omitempty"`
Denoise *bool `json:"denoise,omitempty" yaml:"denoise,omitempty"`
Corpus *ASTTranslateCorpus `json:"corpus,omitempty" yaml:"corpus,omitempty"`
User ASTUser `json:"user,omitempty" yaml:"user,omitempty"`
EventBuffer int `json:"-" yaml:"-"`
BackpressureTimeout time.Duration `json:"-" yaml:"-"`
}
func DefaultASTTranslateConfig ¶
func DefaultASTTranslateConfig() ASTTranslateConfig
type ASTTranslateCorpus ¶
type ASTTranslateCorpus struct {
HotWords []string `json:"hot_words_list,omitempty" yaml:"hot_words_list,omitempty"`
BoostingTableID string `json:"boosting_table_id,omitempty" yaml:"boosting_table_id,omitempty"`
BoostingTableName string `json:"boosting_table_name,omitempty" yaml:"boosting_table_name,omitempty"`
CorrectWords map[string]string `json:"correct_words,omitempty" yaml:"correct_words,omitempty"`
RegexCorrectTableID string `json:"regex_correct_table_id,omitempty" yaml:"regex_correct_table_id,omitempty"`
RegexCorrectTableName string `json:"regex_correct_table_name,omitempty" yaml:"regex_correct_table_name,omitempty"`
Glossary map[string]string `json:"glossary_list,omitempty" yaml:"glossary_list,omitempty"`
GlossaryTableID string `json:"glossary_table_id,omitempty" yaml:"glossary_table_id,omitempty"`
GlossaryTableName string `json:"glossary_table_name,omitempty" yaml:"glossary_table_name,omitempty"`
}
type ASTTranslateEvent ¶
type ASTTranslateEvent struct {
Type ASTTranslateEventType `json:"type"`
SessionID string `json:"session_id,omitempty"`
Text string `json:"text,omitempty"`
Audio []byte `json:"-"`
StartTimeMS int `json:"start_time_ms,omitempty"`
EndTimeMS int `json:"end_time_ms,omitempty"`
SpeakerChanged bool `json:"speaker_changed,omitempty"`
DetectedLanguage string `json:"detected_language,omitempty"`
MutedDurationMS int `json:"muted_duration_ms,omitempty"`
Usage *ASTTranslateUsage `json:"usage,omitempty"`
Error *Error `json:"error,omitempty"`
IsFinal bool `json:"is_final,omitempty"`
Payload []byte `json:"payload,omitempty"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
}
type ASTTranslateEventType ¶
type ASTTranslateEventType int32
const ( ASTEventSessionStarted ASTTranslateEventType = 150 ASTEventSessionCanceled ASTTranslateEventType = 151 ASTEventSessionFinished ASTTranslateEventType = 152 ASTEventSessionFailed ASTTranslateEventType = 153 ASTEventUsageResponse ASTTranslateEventType = 154 ASTEventAudioMuted ASTTranslateEventType = 250 ASTEventTTSSentenceStart ASTTranslateEventType = 350 ASTEventTTSSentenceEnd ASTTranslateEventType = 351 ASTEventTTSResponse ASTTranslateEventType = 352 ASTEventSourceSubtitleStart ASTTranslateEventType = 650 ASTEventSourceSubtitleResponse ASTTranslateEventType = 651 ASTEventSourceSubtitleEnd ASTTranslateEventType = 652 ASTEventTranslationSubtitleStart ASTTranslateEventType = 653 ASTEventTranslationSubtitleResponse ASTTranslateEventType = 654 ASTEventTranslationSubtitleEnd ASTTranslateEventType = 655 )
type ASTTranslateMode ¶
type ASTTranslateMode string
const ( ASTTranslateModeS2T ASTTranslateMode = "s2t" ASTTranslateModeS2S ASTTranslateMode = "s2s" )
type ASTTranslateService ¶
type ASTTranslateService struct {
// contains filtered or unexported fields
}
func (*ASTTranslateService) OpenSession ¶
func (s *ASTTranslateService) OpenSession(ctx context.Context, cfg *ASTTranslateConfig) (*ASTTranslateSession, error)
type ASTTranslateSession ¶
type ASTTranslateSession struct {
// contains filtered or unexported fields
}
func (*ASTTranslateSession) Close ¶
func (s *ASTTranslateSession) Close() error
func (*ASTTranslateSession) Finish ¶
func (s *ASTTranslateSession) Finish(ctx context.Context) error
func (*ASTTranslateSession) Recv ¶
func (s *ASTTranslateSession) Recv() iter.Seq2[*ASTTranslateEvent, error]
func (*ASTTranslateSession) RecvEvent ¶
func (s *ASTTranslateSession) RecvEvent(ctx context.Context) (*ASTTranslateEvent, error)
func (*ASTTranslateSession) SendAudio ¶
func (s *ASTTranslateSession) SendAudio(ctx context.Context, audio []byte) error
func (*ASTTranslateSession) SessionID ¶
func (s *ASTTranslateSession) SessionID() string
func (*ASTTranslateSession) UpdateConfig ¶
func (s *ASTTranslateSession) UpdateConfig(ctx context.Context, update ASTTranslateUpdate) error
type ASTTranslateUpdate ¶
type ASTTranslateUpdate struct {
Corpus *ASTTranslateCorpus `json:"corpus,omitempty" yaml:"corpus,omitempty"`
}
type ASTTranslateUsage ¶
type ASTTranslateUsage struct {
Items []ASTTranslateBillingItem `json:"items,omitempty"`
DurationMS int64 `json:"duration_ms,omitempty"`
WordCount int64 `json:"word_count,omitempty"`
}
type ASTUser ¶
type ASTUser struct {
UID string `json:"uid,omitempty" yaml:"uid,omitempty"`
DID string `json:"did,omitempty" yaml:"did,omitempty"`
Platform string `json:"platform,omitempty" yaml:"platform,omitempty"`
SDKVersion string `json:"sdk_version,omitempty" yaml:"sdk_version,omitempty"`
AppVersion string `json:"app_version,omitempty" yaml:"app_version,omitempty"`
}
type AudioFormat ¶
type AudioFormat string
AudioFormat represents audio encoding format.
const ( FormatPCM AudioFormat = "pcm" FormatPCMS16LE AudioFormat = "pcm_s16le" FormatWAV AudioFormat = "wav" FormatMP3 AudioFormat = "mp3" FormatOGG AudioFormat = "ogg_opus" FormatAAC AudioFormat = "aac" FormatM4A AudioFormat = "m4a" )
type Client ¶
type Client struct {
// ASR V2 streaming recognition.
ASR *ASRServiceV2
ASRV2 *ASRServiceV2
// Voice cloning.
VoiceClone *VoiceCloneService
// Realtime dialogue.
Realtime *RealtimeService
// AST realtime translation.
ASTTranslate *ASTTranslateService
AST *ASTTranslateService
// TTS V2 WebSocket synthesis.
TTS *TTSServiceV2
TTSV2 *TTSServiceV2
// contains filtered or unexported fields
}
Client is the SDK entry point.
In this migration stage, ASR V2, TTS V2 WS, Voice Clone, and Realtime are implemented.
type Error ¶
type Error struct {
Code int `json:"code"`
Message string `json:"message"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
HTTPStatus int `json:"-"`
ReqID string `json:"reqid,omitempty"`
}
Error is the unified error model.
func (*Error) IsAuthError ¶
func (*Error) IsInvalidParam ¶
func (*Error) IsQuotaExceeded ¶
func (*Error) IsRateLimit ¶
func (*Error) IsServerError ¶
type MultipartFile ¶
MultipartFile is one file part in multipart/form-data payload.
type Option ¶
type Option func(*clientConfig)
Option configures Client.
func WithBearerToken ¶
WithBearerToken sets Bearer token. V1 header format is `Authorization: Bearer;{token}` (historical convention).
func WithCluster ¶
WithCluster sets the V1 cluster (kept for backward compatibility).
func WithHTTPClient ¶
WithHTTPClient sets a custom HTTP client.
func WithHTTPTransport ¶
WithHTTPTransport sets a custom HTTP transport doer.
func WithRealtimeAPIKey ¶
WithRealtimeAPIKey is a compatibility alias.
func WithResourceID ¶
WithResourceID sets the default resource_id.
func WithV2APIKey ¶
WithV2APIKey sets V2/V3 authentication.
func WithWebSocketURL ¶
WithWebSocketURL sets the WebSocket base URL.
type RealtimeASRConfig ¶
type RealtimeASRConfig struct {
Language Language `json:"language,omitempty" yaml:"language,omitempty"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeASRConfig configures ASR behavior.
type RealtimeASRResult ¶
RealtimeASRResult is one ASR hypothesis returned by the realtime service.
type RealtimeAudioConfig ¶
type RealtimeAudioConfig struct {
Channel int `json:"channel" yaml:"channel"`
Format AudioFormat `json:"format" yaml:"format"`
SampleRate SampleRate `json:"sample_rate" yaml:"sample_rate"`
Bits int `json:"bits,omitempty" yaml:"bits,omitempty"`
SpeechRate int `json:"speech_rate,omitempty" yaml:"speech_rate,omitempty"`
LoudnessRate int `json:"loudness_rate,omitempty" yaml:"loudness_rate,omitempty"`
}
RealtimeAudioConfig describes audio IO parameters.
type RealtimeConfig ¶
type RealtimeConfig struct {
ASR RealtimeASRConfig `json:"asr" yaml:"asr"`
TTS RealtimeTTSConfig `json:"tts" yaml:"tts"`
Dialog RealtimeDialogConfig `json:"dialog" yaml:"dialog"`
Prompt RealtimePromptConfig `json:"prompt" yaml:"prompt,omitempty"`
Props RealtimeGenerationProps `json:"props" yaml:"props,omitempty"`
History []RealtimeConversationMessage `json:"history,omitempty" yaml:"history,omitempty"`
InputMode RealtimeInputMode `json:"input_mode,omitempty" yaml:"input_mode,omitempty"`
Model RealtimeModelVersion `json:"model,omitempty" yaml:"model,omitempty"`
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
// Local runtime controls (not sent to server).
EventBuffer int `json:"-" yaml:"-"`
BackpressureTimeout time.Duration `json:"-" yaml:"-"`
}
RealtimeConfig represents one realtime session config.
func DefaultRealtimeConfig ¶
func DefaultRealtimeConfig() RealtimeConfig
DefaultRealtimeConfig returns a baseline realtime config.
type RealtimeConnection ¶
type RealtimeConnection struct {
// contains filtered or unexported fields
}
RealtimeConnection represents an established realtime websocket connection.
func (*RealtimeConnection) Close ¶
func (c *RealtimeConnection) Close() error
Close closes websocket connection.
func (*RealtimeConnection) StartSession ¶
func (c *RealtimeConnection) StartSession(ctx context.Context, cfg *RealtimeConfig) (*RealtimeSession, error)
StartSession starts one realtime session on current connection.
type RealtimeConversationMessage ¶
type RealtimeConversationMessage struct {
Role string `json:"role" yaml:"role"`
Content string `json:"content" yaml:"content"`
}
RealtimeConversationMessage is one dialog history entry.
type RealtimeDialogConfig ¶
type RealtimeDialogConfig struct {
BotName string `json:"bot_name,omitempty" yaml:"bot_name,omitempty"`
SystemRole string `json:"system_role,omitempty" yaml:"system_role,omitempty"`
SpeakingStyle string `json:"speaking_style,omitempty" yaml:"speaking_style,omitempty"`
CharacterManifest string `json:"character_manifest,omitempty" yaml:"character_manifest,omitempty"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeDialogConfig configures dialogue behavior.
type RealtimeEvent ¶
type RealtimeEvent struct {
Type RealtimeEventType `json:"type"`
SessionID string `json:"session_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
Sequence int32 `json:"sequence,omitempty"`
Text string `json:"text,omitempty"`
Audio []byte `json:"audio,omitempty"`
Payload []byte `json:"payload,omitempty"`
QuestionID string `json:"question_id,omitempty"`
ReplyID string `json:"reply_id,omitempty"`
TTSType string `json:"tts_type,omitempty"`
StatusCode string `json:"status_code,omitempty"`
Results []RealtimeASRResult `json:"results,omitempty"`
Usage *RealtimeUsage `json:"usage,omitempty"`
Error *Error `json:"error,omitempty"`
IsFinal bool `json:"is_final,omitempty"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
}
RealtimeEvent is one parsed server event.
type RealtimeEventType ¶
type RealtimeEventType int32
RealtimeEventType represents realtime websocket event ID.
const ( // Connection events. EventConnectionStarted RealtimeEventType = 50 EventConnectionFailed RealtimeEventType = 51 EventConnectionEnded RealtimeEventType = 52 // Session events. EventSessionStarted RealtimeEventType = 150 EventSessionFinished RealtimeEventType = 152 EventSessionFailed RealtimeEventType = 153 EventUsageResponse RealtimeEventType = 154 EventConfigUpdated RealtimeEventType = 251 // ASR events. EventASRInfo RealtimeEventType = 450 EventASRResponse RealtimeEventType = 451 EventASREnded RealtimeEventType = 459 // TTS events. EventTTSStarted RealtimeEventType = 350 EventTTSSegmentEnd RealtimeEventType = 351 EventTTSAudioData RealtimeEventType = 352 EventTTSFinished RealtimeEventType = 359 // Chat events. EventChatResponse RealtimeEventType = 550 EventChatTextQueryConfirmed RealtimeEventType = 553 EventChatEnded RealtimeEventType = 559 // Conversation events. EventConversationCreated RealtimeEventType = 567 EventConversationUpdated RealtimeEventType = 568 EventConversationRetrieved RealtimeEventType = 569 EventConversationTruncated RealtimeEventType = 570 EventConversationDeleted RealtimeEventType = 571 EventDialogCommonError RealtimeEventType = 599 )
type RealtimeGenerationProps ¶
type RealtimeGenerationProps struct {
Temperature float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeGenerationProps controls generation params.
type RealtimeInputMode ¶
type RealtimeInputMode string
RealtimeInputMode controls how the session receives user input.
const ( // RealtimeInputModeDefault leaves input_mod unset and uses realtime server-VAD audio. RealtimeInputModeDefault RealtimeInputMode = "" // RealtimeInputModeKeepAlive keeps muted microphone sessions alive. RealtimeInputModeKeepAlive RealtimeInputMode = "keep_alive" // RealtimeInputModePushToTalk uses client-controlled end-of-speech. RealtimeInputModePushToTalk RealtimeInputMode = "push_to_talk" // RealtimeInputModeText sends user turns as text. RealtimeInputModeText RealtimeInputMode = "text" // RealtimeInputModeAudioFile streams a recording file as timed audio chunks. RealtimeInputModeAudioFile RealtimeInputMode = "audio_file" )
type RealtimeModelVersion ¶
type RealtimeModelVersion string
RealtimeModelVersion selects the realtime model family.
const ( RealtimeModelO20 RealtimeModelVersion = "1.2.1.1" RealtimeModelSC20 RealtimeModelVersion = "2.2.0.0" )
type RealtimePromptConfig ¶
type RealtimePromptConfig struct {
System string `json:"system,omitempty" yaml:"system,omitempty"`
Variables map[string]string `json:"variables,omitempty" yaml:"variables,omitempty"`
}
RealtimePromptConfig controls prompt and prompt variables.
type RealtimeService ¶
type RealtimeService struct {
// contains filtered or unexported fields
}
RealtimeService provides real-time dialogue operations.
func (*RealtimeService) Connect ¶
func (s *RealtimeService) Connect(ctx context.Context, cfg *RealtimeConfig) (*RealtimeSession, error)
Connect is a convenience method for Dial + StartSession.
func (*RealtimeService) Dial ¶
func (s *RealtimeService) Dial(ctx context.Context) (*RealtimeConnection, error)
Dial opens a realtime websocket connection and completes StartConnection handshake.
func (*RealtimeService) OpenSession ¶
func (s *RealtimeService) OpenSession(ctx context.Context, cfg *RealtimeConfig) (*RealtimeSession, error)
OpenSession is a compatibility alias for Connect.
type RealtimeSession ¶
type RealtimeSession struct {
// contains filtered or unexported fields
}
RealtimeSession represents one realtime dialogue session.
func (*RealtimeSession) Close ¶
func (s *RealtimeSession) Close() error
Close closes current session. It is idempotent.
func (*RealtimeSession) EndASR ¶
func (s *RealtimeSession) EndASR(ctx context.Context) error
EndASR signals the end of client-side audio input in push-to-talk mode (event=400).
func (*RealtimeSession) FinishSession ¶
func (s *RealtimeSession) FinishSession(ctx context.Context) error
FinishSession ends the current session while leaving the websocket connection reusable (event=102).
func (*RealtimeSession) Interrupt ¶
func (s *RealtimeSession) Interrupt(ctx context.Context) error
Interrupt interrupts current generation (event=515).
func (*RealtimeSession) Recv ¶
func (s *RealtimeSession) Recv() iter.Seq2[*RealtimeEvent, error]
Recv returns a streaming iterator. Concurrent Recv is not supported.
func (*RealtimeSession) RecvEvent ¶
func (s *RealtimeSession) RecvEvent(ctx context.Context) (*RealtimeEvent, error)
RecvEvent receives one event. Concurrent Recv/RecvEvent is not supported.
func (*RealtimeSession) ReplaceHistory ¶
func (s *RealtimeSession) ReplaceHistory(index int, message RealtimeConversationMessage) error
ReplaceHistory replaces one item in local history by index.
func (*RealtimeSession) SayHello ¶
func (s *RealtimeSession) SayHello(ctx context.Context, content string) error
SayHello sends SayHello event (event=300).
func (*RealtimeSession) SendAudio ¶
func (s *RealtimeSession) SendAudio(ctx context.Context, audio []byte) error
SendAudio sends one audio chunk (event=200).
func (*RealtimeSession) SendTTSText ¶
func (s *RealtimeSession) SendTTSText(ctx context.Context, text string) error
SendTTSText sends one complete caller-provided TTS text transaction (event=500).
func (*RealtimeSession) SendText ¶
func (s *RealtimeSession) SendText(ctx context.Context, text string) error
SendText sends user text (event=501).
func (*RealtimeSession) SendUserMessage ¶
func (s *RealtimeSession) SendUserMessage(ctx context.Context, text string) error
SendUserMessage sends one user text with current history/prompt/props snapshot.
func (*RealtimeSession) SessionID ¶
func (s *RealtimeSession) SessionID() string
SessionID returns current session ID.
func (*RealtimeSession) UpdateHistory ¶
func (s *RealtimeSession) UpdateHistory(history []RealtimeConversationMessage)
UpdateHistory replaces the whole local history snapshot used by future turns.
func (*RealtimeSession) UpdatePrompt ¶
func (s *RealtimeSession) UpdatePrompt(prompt RealtimePromptConfig)
UpdatePrompt replaces current prompt config used by future turns.
func (*RealtimeSession) UpdateProps ¶
func (s *RealtimeSession) UpdateProps(props RealtimeGenerationProps)
UpdateProps replaces current generation props used by future turns.
type RealtimeTTSConfig ¶
type RealtimeTTSConfig struct {
Speaker string `json:"speaker" yaml:"speaker"`
AudioConfig RealtimeAudioConfig `json:"audio_config" yaml:"audio_config"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeTTSConfig configures TTS behavior.
type RealtimeUsage ¶
type RealtimeUsage struct {
InputTextTokens int `json:"input_text_tokens,omitempty"`
InputAudioTokens int `json:"input_audio_tokens,omitempty"`
CachedTextTokens int `json:"cached_text_tokens,omitempty"`
CachedAudioTokens int `json:"cached_audio_tokens,omitempty"`
OutputTextTokens int `json:"output_text_tokens,omitempty"`
OutputAudioTokens int `json:"output_audio_tokens,omitempty"`
}
RealtimeUsage contains token usage reported by a realtime response.
type SampleRate ¶
type SampleRate int
SampleRate represents audio sample rate.
const ( SampleRate8000 SampleRate = 8000 SampleRate16000 SampleRate = 16000 SampleRate22050 SampleRate = 22050 SampleRate24000 SampleRate = 24000 SampleRate32000 SampleRate = 32000 SampleRate44100 SampleRate = 44100 SampleRate48000 SampleRate = 48000 )
type StreamASRConfig ¶
type StreamASRConfig = ASRV2Config
Backward-compatible aliases mapped to V2 types.
type TTSServiceV2 ¶
type TTSServiceV2 struct {
// contains filtered or unexported fields
}
TTSServiceV2 provides TTS V2 WebSocket streaming synthesis.
func (*TTSServiceV2) OpenStreamSession ¶
func (s *TTSServiceV2) OpenStreamSession(ctx context.Context, cfg *TTSV2WSConfig) (*TTSV2WSSession, error)
OpenStreamSession opens a TTS V2 bidirectional WebSocket stream session.
func (*TTSServiceV2) Stream ¶
func (s *TTSServiceV2) Stream(ctx context.Context, req *TTSV2Request) iter.Seq2[*TTSV2Chunk, error]
Stream synthesizes speech with TTS V2 HTTP streaming endpoint.
type TTSV2Chunk ¶
type TTSV2Chunk struct {
Audio []byte `json:"-"`
IsLast bool `json:"is_last"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
Code int `json:"code"`
Message string `json:"message,omitempty"`
}
TTSV2Chunk is one stream chunk from TTS V2 HTTP streaming API.
type TTSV2MixSpeaker ¶
type TTSV2MixSpeaker struct {
Speakers []TTSV2MixSpeakerSource `json:"speakers,omitempty" yaml:"speakers,omitempty"`
}
TTSV2MixSpeaker represents mixed-speaker parameters.
type TTSV2MixSpeakerSource ¶
type TTSV2MixSpeakerSource struct {
SourceSpeaker string `json:"source_speaker" yaml:"source_speaker"`
MixFactor float64 `json:"mix_factor" yaml:"mix_factor"`
}
TTSV2MixSpeakerSource is one source speaker in a mixed-speaker request.
type TTSV2Request ¶
type TTSV2Request struct {
Text string `json:"text" yaml:"text"`
Speaker string `json:"speaker" yaml:"speaker"`
Format AudioFormat `json:"format,omitempty" yaml:"format,omitempty"`
SampleRate SampleRate `json:"sample_rate,omitempty" yaml:"sample_rate,omitempty"`
BitRate int `json:"bit_rate,omitempty" yaml:"bit_rate,omitempty"`
SpeechRate int `json:"speech_rate,omitempty" yaml:"speech_rate,omitempty"`
PitchRate int `json:"pitch_rate,omitempty" yaml:"pitch_rate,omitempty"`
VolumeRate int `json:"volume_rate,omitempty" yaml:"volume_rate,omitempty"`
Emotion string `json:"emotion,omitempty" yaml:"emotion,omitempty"`
Language string `json:"language,omitempty" yaml:"language,omitempty"`
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
MixSpeaker *TTSV2MixSpeaker `json:"mix_speaker,omitempty" yaml:"mix_speaker,omitempty"`
}
TTSV2Request represents a TTS V2 stream request.
type TTSV2WSChunk ¶
type TTSV2WSChunk struct {
Audio []byte `json:"-"`
IsFinal bool `json:"is_final"`
Event int32 `json:"event"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
}
TTSV2WSChunk is one downstream chunk from TTS V2 WebSocket stream.
type TTSV2WSConfig ¶
type TTSV2WSConfig struct {
Speaker string `json:"speaker" yaml:"speaker"`
Format AudioFormat `json:"format,omitempty" yaml:"format,omitempty"`
SampleRate SampleRate `json:"sample_rate,omitempty" yaml:"sample_rate,omitempty"`
// ResourceID defaults to seed-tts-2.0 when empty.
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
}
TTSV2WSConfig is bidirectional TTS V2 WebSocket session config.
type TTSV2WSSession ¶
type TTSV2WSSession struct {
// contains filtered or unexported fields
}
TTSV2WSSession represents one bidirectional TTS V2 WebSocket session.
func (*TTSV2WSSession) CancelSession ¶
func (s *TTSV2WSSession) CancelSession(ctx context.Context) error
CancelSession cancels the current session.
func (*TTSV2WSSession) Recv ¶
func (s *TTSV2WSSession) Recv() iter.Seq2[*TTSV2WSChunk, error]
Recv yields TTS output chunks.
func (*TTSV2WSSession) SendText ¶
SendText sends one text piece. When isLast=true, it also sends FinishSession.
func (*TTSV2WSSession) StartNextSession ¶
func (s *TTSV2WSSession) StartNextSession(ctx context.Context) error
StartNextSession starts a new session on the same WebSocket connection.
type Task ¶
Task represents an asynchronous task.
func (*Task[T]) SetFailureMapper ¶
func (t *Task[T]) SetFailureMapper(mapper TaskFailureMapper[T]) *Task[T]
SetFailureMapper customizes terminal task failure error mapping.
func (*Task[T]) SetStatusMapper ¶
func (t *Task[T]) SetStatusMapper(mapper TaskStatusMapper) *Task[T]
SetStatusMapper customizes task status normalization.
type TaskFailureMapper ¶
type TaskFailureMapper[T any] func(status TaskStatus, result *T) error
TaskFailureMapper converts terminal failure status to a concrete error.
type TaskPoller ¶
type TaskPoller[T any] func(ctx context.Context, taskID string) (status TaskStatus, result *T, err error)
TaskPoller polls current task status and optional result.
type TaskStatus ¶
type TaskStatus string
TaskStatus is async task status.
const ( TaskStatusPending TaskStatus = "pending" TaskStatusProcessing TaskStatus = "processing" TaskStatusSuccess TaskStatus = "success" TaskStatusFailed TaskStatus = "failed" TaskStatusCancelled TaskStatus = "cancelled" )
type TaskStatusMapper ¶
type TaskStatusMapper func(status TaskStatus) TaskStatus
TaskStatusMapper normalizes raw task status values.
type Utterance ¶
type Utterance = ASRV2Utterance
type VoiceCloneRequest ¶
type VoiceCloneRequest struct {
// VoiceID is a custom voice identifier.
VoiceID string `json:"voice_id,omitempty" yaml:"voice_id,omitempty"`
// SpeakerID is an alias of VoiceID for compatibility with official docs.
SpeakerID string `json:"speaker_id,omitempty" yaml:"speaker_id,omitempty"`
Audio []byte `json:"-" yaml:"-"`
AudioFileName string `json:"audio_file_name,omitempty" yaml:"audio_file_name,omitempty"`
AudioContentType string `json:"audio_content_type,omitempty" yaml:"audio_content_type,omitempty"`
AudioFormat string `json:"audio_format,omitempty" yaml:"audio_format,omitempty"`
Text string `json:"text,omitempty" yaml:"text,omitempty"`
Language int `json:"language,omitempty" yaml:"language,omitempty"`
ModelType int `json:"model_type,omitempty" yaml:"model_type,omitempty"`
Source int `json:"source,omitempty" yaml:"source,omitempty"`
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
PollInterval time.Duration `json:"-" yaml:"-"`
}
VoiceCloneRequest is the request payload for voice clone upload task.
type VoiceCloneService ¶
type VoiceCloneService struct {
// contains filtered or unexported fields
}
VoiceCloneService provides voice clone training and status operations.
func (*VoiceCloneService) Activate ¶
func (s *VoiceCloneService) Activate(ctx context.Context, voiceID string) error
Activate formalizes a trained cloned voice.
func (*VoiceCloneService) GetStatus ¶
func (s *VoiceCloneService) GetStatus(ctx context.Context, speakerOrVoiceID string) (*VoiceCloneStatus, error)
GetStatus queries current voice clone task status.
func (*VoiceCloneService) Submit ¶
func (s *VoiceCloneService) Submit(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
Submit uploads training audio and returns a task handle for polling.
func (*VoiceCloneService) Upload ¶
func (s *VoiceCloneService) Upload(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
Upload uploads training audio and returns a task handle for polling.
type VoiceCloneStatus ¶
type VoiceCloneStatus struct {
TaskID string `json:"task_id,omitempty"`
SpeakerID string `json:"speaker_id,omitempty"`
VoiceID string `json:"voice_id,omitempty"`
Status TaskStatus `json:"status"`
RawStatus string `json:"raw_status,omitempty"`
RawStatusCode int `json:"raw_status_code,omitempty"`
StatusCode int `json:"status_code,omitempty"`
StatusMessage string `json:"status_message,omitempty"`
Version string `json:"version,omitempty"`
DemoAudio string `json:"demo_audio,omitempty"`
CreateTime int64 `json:"create_time,omitempty"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
}
VoiceCloneStatus is one status snapshot of clone training task.
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
examples
|
|
|
asr_v2_sauc_ws
command
|
|
|
ast_v2_translate
command
|
|
|
realtime
command
|
|
|
tts_v2/http_stream
command
|
|
|
tts_v2/websocket
command
|
|
|
voice_clone
command
|
|
|
internal
|
|