volcvoice

package module

v0.1.1 Latest Latest Go to latest Published: Nov 12, 2024 License: MIT Imports: 19 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/yankeguo/volcvoice

Links

Open Source Insights

README ¶

volcvoice

A Go Library for Volcengine (Bytedance) Voice Service

Notes

PCM output format

16-bit signed integer, little-endian, mono, default to 24k sample rate.

Example ffmpeg command:

ffmpeg -f s16le -ar 24k -ac 1 -i test.pcm test.mp3

Usages

Create Client

// using VOLCVOICE_APPID and VOLCVOICE_TOKEN
client := volcvoice.NewClient()

// explicit set appid and token
client := volcvoice.NewClient(
    volcvoice.WithAppID("your_app_id"),
    volcvoice.WithToken("your_token"),
)

Voice Clone Upload

service := client.VoiceCloneUpload().
	SetSpeakerID(os.Getenv("VOLCVOICE_SPEAKER_ID")).
	AddAudio(buf, FormatMP3, "").
	SetModelType(VoiceCloneUploadModelTypeV2)

err := service.Do(context.Background())

Stream Synthesize

service := client.StreamSynthesize().
	SetInput(`“水何澹澹，山岛竦峙。树木丛生，百草丰茂。秋风萧瑟，洪波涌起”是实写眼前的景观，神奇而又壮观。“水何澹澹，山岛竦峙”是望海初得的大致印象，有点像绘画的轮廓。`).
	SetFormat(FormatPCM).
	SetRequestID(requestId.String()).
	SetCluster(StreamSynthesizeClusterV2).
	SetUserID("test").
	SetSpeakerID(os.Getenv("VOLCVOICE_SPEAKER_ID")).
	SetOutput(func(ctx context.Context, buf []byte) (err error) {
		t.Logf("len(buf): %d", len(buf))
		_, err = f.Write(buf)
		return
	})

err := service.Do(context.Background())

Bi-directional Stream Synthesize

var (
	input = []string{
		"离离原上草，一岁一枯荣。",
		"野火烧不尽，春风吹又生。",
		"远芳侵古道，晴翠接荒城。",
		"又送王孙去，萋萋满别情。",
	}
	inputIdx int64 = -1
)

service := client.DuplexSynthesize().
	SetResourceID(SynthesizeResourceVoiceClone2).
	SetRequestID(rg.Must(uuid.NewV7()).String()).
	SetConnectID(rg.Must(uuid.NewV7()).String()).
	SetFormat(AudioFormatPCM).
	SetSampleRate(SampleRate16K).
	SetSpeakerID(os.Getenv("VOLCVOICE_SPEAKER_ID")).
	SetInput(func(ctx context.Context) (chunk string, err error) {
		idx := atomic.AddInt64(&inputIdx, 1)
		if idx >= int64(len(input)) {
			err = io.EOF
			return
		}
		time.Sleep(400 * time.Millisecond)
		chunk = input[idx]
		return
	}).
	SetOutput(func(ctx context.Context, chunk []byte) (err error) {
		_, err = f.Write(chunk)
		return
	}).
	SetUserID("test-user")

err := service.Do(context.Background())

Credits

GUO YANKE, MIT License

Documentation ¶

Index ¶

Constants
type Client
- func NewClient(fns ...Option) (Client, error)
type DuplexSynthesizeService
type Option
type StreamSynthesizeInput
- func StreamSynthesizeInputFromChannel(input chan string) StreamSynthesizeInput
- func StreamSynthesizeInputFromSlice(input []string) StreamSynthesizeInput
type StreamSynthesizeOutput
type StreamSynthesizeService
type VoiceCloneUploadAudio
type VoiceCloneUploadResponse
type VoiceCloneUploadService

Constants ¶

View Source

const (
	FormatAAC      = "aac"
	FormatM4A      = "m4a"
	FormatMP3      = "mp3"
	FormatOGG      = "ogg"
	FormatOGG_OPUS = "ogg_opus"
	FormatPCM      = "pcm"
	FormatWAV      = "wav"

	SampleRate8K  = 8000
	SampleRate16K = 16000
	SampleRate24K = 24000
	SampleRate32K = 32000
	SampleRate44K = 44100
	SampleRate48K = 48000
)

View Source

const (
	// DuplexSynthesizeResourceStandard is a resource id for TTS service.
	DuplexSynthesizeResourceStandard = "volc.service_type.10029"

	// DuplexSynthesizeResourceVoiceCloneV2 is a resource id for VoiceClone 2.0 service.
	DuplexSynthesizeResourceVoiceCloneV2 = "volc.megatts.default"
)

View Source

const (
	StreamSynthesizeClusterV1           = "volcano_mega"
	StreamSynthesizeClusterV1Concurrent = "volcano_mega_concurr"
	StreamSynthesizeClusterV2           = "volcano_icl"
	StreamSynthesizeClusterV2Concurrent = "volcano_icl_concurr"
)

View Source

const (
	VoiceCloneUploadLanguageCN = 0
	VoiceCloneUploadLanguageEN = 1
	VoiceCloneUploadLanguageJA = 2
	VoiceCloneUploadLanguageES = 3
	VoiceCloneUploadLanguageID = 4
	VoiceCloneUploadLanguagePT = 5

	VoiceCloneUploadModelTypeV1 = 0
	VoiceCloneUploadModelTypeV2 = 1
)

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Client ¶

type Client interface {
	// StreamSynthesize create a new service for stream synthesize.
	StreamSynthesize() *StreamSynthesizeService

	// DuplexSynthesize create a new service for bidirectional stream synthesize.
	DuplexSynthesize() *DuplexSynthesizeService

	// VoiceCloneUpload create a new service for voice clone upload.
	VoiceCloneUpload() *VoiceCloneUploadService
}

Client is the interface for the volcvoice client.

func NewClient ¶

func NewClient(fns ...Option) (Client, error)

NewClient creates a new client with the given options.

type DuplexSynthesizeService ¶

type DuplexSynthesizeService struct {
	// contains filtered or unexported fields
}

DuplexSynthesizeService is a service to synthesize speech in bi-directional stream mode.

func (*DuplexSynthesizeService) Do ¶

func (s *DuplexSynthesizeService) Do(ctx context.Context) (err error)

func (*DuplexSynthesizeService) SetConnectID ¶

func (s *DuplexSynthesizeService) SetConnectID(id string) *DuplexSynthesizeService

SetConnectID sets the connect id

func (*DuplexSynthesizeService) SetFormat ¶

func (s *DuplexSynthesizeService) SetFormat(format string) *DuplexSynthesizeService

SetFormat sets the format of the synthesized speech

func (*DuplexSynthesizeService) SetInput ¶

func (s *DuplexSynthesizeService) SetInput(input StreamSynthesizeInput) *DuplexSynthesizeService

SetInputFunc sets the input function

func (*DuplexSynthesizeService) SetOutput ¶

func (s *DuplexSynthesizeService) SetOutput(output StreamSynthesizeOutput) *DuplexSynthesizeService

SetOutputFunc sets the output function

func (*DuplexSynthesizeService) SetPitchRate ¶

func (s *DuplexSynthesizeService) SetPitchRate(rate int) *DuplexSynthesizeService

SetPitchRate sets the pitch rate of the synthesized speech

func (*DuplexSynthesizeService) SetRequestID ¶

func (s *DuplexSynthesizeService) SetRequestID(id string) *DuplexSynthesizeService

SetRequestID sets the request id

func (*DuplexSynthesizeService) SetResourceID ¶

func (s *DuplexSynthesizeService) SetResourceID(id string) *DuplexSynthesizeService

SetResourceID sets the resource id

func (*DuplexSynthesizeService) SetSSML ¶

func (s *DuplexSynthesizeService) SetSSML(ssml bool) *DuplexSynthesizeService

SetSSML sets the ssml mode

func (*DuplexSynthesizeService) SetSampleRate ¶

func (s *DuplexSynthesizeService) SetSampleRate(rate int) *DuplexSynthesizeService

SetSampleRate sets the sample rate of the synthesized speech

func (*DuplexSynthesizeService) SetSpeakerID ¶

func (s *DuplexSynthesizeService) SetSpeakerID(id string) *DuplexSynthesizeService

SetSpeakerID sets the speaker id, for VoiceClone service, use the "S_" started speaker id.

func (*DuplexSynthesizeService) SetSpeechRate ¶

func (s *DuplexSynthesizeService) SetSpeechRate(rate int) *DuplexSynthesizeService

SetSpeechRate sets the speech rate of the synthesized speech

func (*DuplexSynthesizeService) SetUserID ¶

func (s *DuplexSynthesizeService) SetUserID(id string) *DuplexSynthesizeService

SetUserID sets the user id

type Option ¶

type Option func(opts *options)

func WithAppID ¶

func WithAppID(appID string) Option

WithAppID sets the appID for the client, default to VOLCVOICE_APPID env.

func WithEndpoint ¶

func WithEndpoint(endpoint string) Option

WithEndpoint sets a custom endpoint for the client, default to openspeech.bytedance.com, also can be set by VOLCVOICE_ENDPOINT env.

func WithHTTPClient ¶

func WithHTTPClient(client *http.Client) Option

WithHTTPClient sets a custom http client for the client.

func WithToken ¶

func WithToken(token string) Option

WithToken sets the token for the client, default to VOLCVOICE_TOKEN env.

func WithVerbose ¶

func WithVerbose(verbose bool) Option

WithVerbose enables verbose mode for the client, default to VOLCVOICE_VERBOSE env.

func WithWebsocketDialer ¶

func WithWebsocketDialer(dialer *websocket.Dialer) Option

WithWebsocketDialer sets a custom websocket dialer for the client.

type StreamSynthesizeInput ¶

type StreamSynthesizeInput func(ctx context.Context) (chunk string, err error)

StreamSynthesizeInput is a function to get input text chunk, the last chunk should be empty string and io.EOF error.

func StreamSynthesizeInputFromChannel ¶ added in v0.1.1

func StreamSynthesizeInputFromChannel(input chan string) StreamSynthesizeInput

func StreamSynthesizeInputFromSlice ¶ added in v0.1.1

func StreamSynthesizeInputFromSlice(input []string) StreamSynthesizeInput

StreamSynthesizeOutput is a function to handle output audio chunk.

type StreamSynthesizeOutput ¶

type StreamSynthesizeOutput func(ctx context.Context, chunk []byte) (err error)

StreamSynthesizeOutput is a function to process output audio chunk.

type StreamSynthesizeService ¶

type StreamSynthesizeService struct {
	// contains filtered or unexported fields
}

StreamSynthesizeService is the service for voice clone.

func (*StreamSynthesizeService) Do ¶

func (s *StreamSynthesizeService) Do(ctx context.Context) (err error)

Do sends the audio request to the server, and stream audio chunks to handler.

func (*StreamSynthesizeService) SetCluster ¶

func (s *StreamSynthesizeService) SetCluster(cluster string) *StreamSynthesizeService

SetCluster sets the cluster for the audio.

func (*StreamSynthesizeService) SetFormat ¶

func (s *StreamSynthesizeService) SetFormat(format string) *StreamSynthesizeService

SetFormat sets the encoding for the audio.

func (*StreamSynthesizeService) SetInput ¶

func (s *StreamSynthesizeService) SetInput(input string) *StreamSynthesizeService

SetInput sets the text for the audio.

func (*StreamSynthesizeService) SetOutput ¶

func (s *StreamSynthesizeService) SetOutput(output StreamSynthesizeOutput) *StreamSynthesizeService

SetOutput sets the handler for the audio chunks.

func (*StreamSynthesizeService) SetRequestID ¶

func (s *StreamSynthesizeService) SetRequestID(reqID string) *StreamSynthesizeService

SetRequestID sets the request id for the audio.

func (*StreamSynthesizeService) SetSSML ¶

func (s *StreamSynthesizeService) SetSSML(ssml bool) *StreamSynthesizeService

SetSSML sets the text type to SSML.

func (*StreamSynthesizeService) SetSpeakerID ¶

func (s *StreamSynthesizeService) SetSpeakerID(speakerID string) *StreamSynthesizeService

SetSpeakerID sets the voice type for the audio, also known as the speaker id.

func (*StreamSynthesizeService) SetUserID ¶

func (s *StreamSynthesizeService) SetUserID(userID string) *StreamSynthesizeService

SetUserID sets the user id for the audio.

type VoiceCloneUploadAudio ¶

type VoiceCloneUploadAudio struct {
	AudioBytes  string `json:"audio_bytes"`
	AudioFormat string `json:"audio_format,omitempty"`
	Text        string `json:"text,omitempty"`
}

type VoiceCloneUploadResponse ¶

type VoiceCloneUploadResponse struct {
	BaseResp struct {
		StatusCode    int    `json:"StatusCode"`
		StatusMessage string `json:"StatusMessage"`
	} `json:"BaseResp"`
	SpeakerID string `json:"speaker_id"`
}

type VoiceCloneUploadService ¶

type VoiceCloneUploadService struct {
	// contains filtered or unexported fields
}

func (*VoiceCloneUploadService) AddAudio ¶

func (s *VoiceCloneUploadService) AddAudio(buf []byte, format string, text string) *VoiceCloneUploadService

func (*VoiceCloneUploadService) Do ¶

func (s *VoiceCloneUploadService) Do(ctx context.Context) (err error)

func (*VoiceCloneUploadService) SetLanguage ¶

func (s *VoiceCloneUploadService) SetLanguage(language int) *VoiceCloneUploadService

func (*VoiceCloneUploadService) SetModelType ¶

func (s *VoiceCloneUploadService) SetModelType(modelType int) *VoiceCloneUploadService

func (*VoiceCloneUploadService) SetSpeakerID ¶

func (s *VoiceCloneUploadService) SetSpeakerID(speakerID string) *VoiceCloneUploadService

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
internal
duplex_wire
stream_wire

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL