Documentation
¶
Index ¶
- Constants
- func DefaultIntentAliases() map[string]string
- func ExtractEmbeddedBundle(cacheDir string) (string, error)
- func NormalizeIntent(intent string, aliases map[string]string) string
- func NormalizeThresholds(thresholds map[string]float64, aliases map[string]string) map[string]float64
- func SaveBundleManifest(bundleDir string, manifest BundleManifest) error
- func SaveSamplesCSV(path string, samples []Sample) error
- type BundleManifest
- type BundleModelSummary
- type Candidate
- type ClassMetrics
- type DatasetSplitConfig
- type DeterministicRule
- type Engine
- type EvalReport
- type HybridDecision
- type HybridPolicy
- type IntentDataSummary
- type Language
- type LanguageDetection
- type ModelMeta
- type PredictOptions
- type Prediction
- type Router
- func NewRouter(defaultLang string) *Router
- func NewRouterFromBundle(bundleDir string) (*Router, error)
- func NewRouterFromDirs(modelByLanguage map[string]string, defaultLang string) (*Router, error)
- func NewRouterFromEmbedded() (*Router, error)
- func NewRouterFromEmbeddedIn(cacheDir string) (*Router, error)
- func NewRouterWithOptions(defaultLang string, options RouterOptions) *Router
- type RouterOptions
- type Sample
- type SourceMetadata
- type TaxonomyConfig
- type Tokenizer
- type TokenizerConfig
- type TrainConfig
- type TrainedModel
- type TrainingConfigSnapshot
- type TrainingMetadata
Constants ¶
const ( HybridRouteRule = "rule" HybridRouteNLU = "nlu" HybridRouteCandidate = "candidate" HybridRouteFallback = "fallback" )
const ( // DefaultUnknownIntent is returned when confidence is below threshold. DefaultUnknownIntent = "unknown" // ModelBinaryFile is the bayesian model artifact file name. ModelBinaryFile = "model.gob" // ModelMetaFile is the metadata artifact file name. ModelMetaFile = "meta.json" )
const (
// BundleManifestFileName is the default file name of multilingual router bundle manifest.
BundleManifestFileName = "manifest.json"
)
Variables ¶
This section is empty.
Functions ¶
func DefaultIntentAliases ¶
DefaultIntentAliases returns a copy of the stable intent taxonomy aliases.
func ExtractEmbeddedBundle ¶
ExtractEmbeddedBundle extracts embedded multilingual bundle to local filesystem and returns bundle directory. Extraction is idempotent for the same cacheDir and embedded manifest hash.
func NormalizeIntent ¶
NormalizeIntent normalizes one intent to canonical taxonomy label.
func NormalizeThresholds ¶
func NormalizeThresholds(thresholds map[string]float64, aliases map[string]string) map[string]float64
NormalizeThresholds canonicalizes threshold keys with taxonomy aliases.
func SaveBundleManifest ¶
func SaveBundleManifest(bundleDir string, manifest BundleManifest) error
SaveBundleManifest writes bundle manifest into bundle directory.
func SaveSamplesCSV ¶
SaveSamplesCSV writes labeled samples into CSV with header text,intent.
Types ¶
type BundleManifest ¶
type BundleManifest struct {
Version string `json:"version"`
CreatedAt time.Time `json:"createdAt"`
DefaultLanguage string `json:"defaultLanguage"`
Corpus SourceMetadata `json:"corpus,omitempty"`
TrainingParams map[string]string `json:"trainingParams,omitempty"`
ModelSummary map[string]BundleModelSummary `json:"modelSummary,omitempty"`
Models map[string]string `json:"models"` // lang -> relative model directory
}
BundleManifest describes one multilingual model bundle.
func EmbeddedBundleManifest ¶
func EmbeddedBundleManifest() (BundleManifest, error)
EmbeddedBundleManifest reads bundle manifest from embedded assets.
func LoadBundleManifest ¶
func LoadBundleManifest(bundleDir string) (BundleManifest, error)
LoadBundleManifest reads manifest.json from a bundle directory.
type BundleModelSummary ¶
type BundleModelSummary struct {
Version string `json:"version,omitempty"`
Language string `json:"language,omitempty"`
TrainingSampleCount int `json:"trainingSampleCount,omitempty"`
TotalSampleCount int `json:"totalSampleCount,omitempty"`
DefaultThreshold float64 `json:"defaultThreshold,omitempty"`
MacroF1 float64 `json:"macroF1,omitempty"`
}
BundleModelSummary stores high-level model metadata in bundle manifest.
type ClassMetrics ¶
type ClassMetrics struct {
Precision float64 `json:"precision"`
Recall float64 `json:"recall"`
F1 float64 `json:"f1"`
Top1Recall float64 `json:"top1Recall,omitempty"`
Top3Recall float64 `json:"top3Recall,omitempty"`
Top5Recall float64 `json:"top5Recall,omitempty"`
Support int `json:"support"`
TP int `json:"tp"`
FP int `json:"fp"`
FN int `json:"fn"`
Top1CandidateTP int `json:"top1CandidateTp,omitempty"`
Top3CandidateTP int `json:"top3CandidateTp,omitempty"`
Top5CandidateTP int `json:"top5CandidateTp,omitempty"`
}
ClassMetrics describes one intent evaluation result.
type DatasetSplitConfig ¶
type DatasetSplitConfig struct {
Enabled bool `json:"enabled"`
TrainRatio float64 `json:"trainRatio"`
ValRatio float64 `json:"valRatio"`
TestRatio float64 `json:"testRatio"`
Seed int64 `json:"seed"`
}
DatasetSplitConfig controls deterministic train/val/test split.
func DefaultDatasetSplitConfig ¶
func DefaultDatasetSplitConfig() DatasetSplitConfig
DefaultDatasetSplitConfig returns default split config.
type DeterministicRule ¶
type DeterministicRule struct {
ID string `json:"id"`
Intent string `json:"intent"`
Language string `json:"language,omitempty"`
EqualsAny []string `json:"equalsAny,omitempty"`
PrefixAny []string `json:"prefixAny,omitempty"`
ContainsAny []string `json:"containsAny,omitempty"`
Regex string `json:"regex,omitempty"`
// contains filtered or unexported fields
}
DeterministicRule defines one pre-NLU deterministic route rule.
type Engine ¶
type Engine struct {
// contains filtered or unexported fields
}
Engine provides concurrent-safe prediction and hot reload.
func NewEngineFromDir ¶
NewEngineFromDir loads model artifacts and creates a prediction engine.
func (*Engine) Predict ¶
func (e *Engine) Predict(_ context.Context, text string, opts PredictOptions) (Prediction, error)
Predict runs intent prediction.
type EvalReport ¶
type EvalReport struct {
Split string `json:"split"`
Samples int `json:"samples"`
Accuracy float64 `json:"accuracy"`
MacroF1 float64 `json:"macroF1"`
MicroF1 float64 `json:"microF1"`
UnknownRate float64 `json:"unknownRate"`
PerIntent map[string]ClassMetrics `json:"perIntent"`
Confusion map[string]map[string]int `json:"confusion"`
}
EvalReport describes evaluation metrics for one split.
type HybridDecision ¶
type HybridDecision struct {
Route string `json:"route"`
Intent string `json:"intent,omitempty"`
RuleID string `json:"ruleId,omitempty"`
Prediction Prediction `json:"prediction"`
ShouldCallLLM bool `json:"shouldCallLLM"`
}
HybridDecision describes final routing decision.
type HybridPolicy ¶
type HybridPolicy struct {
Rules []DeterministicRule
Router *Router
Engine *Engine
UnknownIntent string
}
HybridPolicy combines deterministic rules + NLU + fallback. With PredictOptions.CandidateMode, NLU is used as a high-recall candidate generator and the final decision is left to the downstream LLM/tool planner.
func (*HybridPolicy) Decide ¶
func (p *HybridPolicy) Decide(ctx context.Context, text string, opts PredictOptions) (HybridDecision, error)
Decide applies deterministic rules first, then NLU, then fallback.
func (*HybridPolicy) Prepare ¶
func (p *HybridPolicy) Prepare() error
Prepare validates and compiles regex for hybrid rules.
type IntentDataSummary ¶
type IntentDataSummary struct {
Total int `json:"total"`
Train int `json:"train"`
Val int `json:"val"`
Test int `json:"test"`
}
IntentDataSummary stores per-intent sample counts by split.
type Language ¶
type Language string
Language identifies tokenizer/model language.
func DetectLanguage ¶
DetectLanguage returns a lightweight language guess for routing.
type LanguageDetection ¶
type LanguageDetection struct {
Language Language
Confidence float64
Reason string
LetterCount int
ShortText bool
}
LanguageDetection stores language detection result.
func DetectLanguageDetailed ¶
func DetectLanguageDetailed(text string) LanguageDetection
DetectLanguageDetailed returns language detection with confidence and reason.
type ModelMeta ¶
type ModelMeta struct {
Version string `json:"version"`
Language string `json:"language,omitempty"`
UnknownIntent string `json:"unknownIntent"`
DefaultThreshold float64 `json:"defaultThreshold"`
Thresholds map[string]float64 `json:"thresholds,omitempty"`
Classes []string `json:"classes"`
CanonicalIntents []string `json:"canonicalIntents,omitempty"`
IntentAliases map[string]string `json:"intentAliases,omitempty"`
Tokenizer TokenizerConfig `json:"tokenizer"`
TrainingSampleCount int `json:"trainingSampleCount"`
CreatedAt time.Time `json:"createdAt"`
Evaluation map[string]EvalReport `json:"evaluation,omitempty"`
Training TrainingMetadata `json:"training"`
Source SourceMetadata `json:"source,omitempty"`
}
ModelMeta stores model metadata and inference policy.
type PredictOptions ¶
type PredictOptions struct {
TopK int
LanguageHint string
MinConfidence float64 // if > 0, override model threshold for direct routing
IgnoreThreshold bool
// CandidateMode favors recall for LLM/tool-planner handoff. It keeps the
// best intent accepted regardless of threshold while still returning TopK
// candidates for downstream final selection.
CandidateMode bool
}
PredictOptions controls prediction behavior.
type Prediction ¶
type Prediction struct {
Intent string `json:"intent"`
Language string `json:"language,omitempty"`
Confidence float64 `json:"confidence"`
Strict bool `json:"strict"`
Matched bool `json:"matched"`
Reason string `json:"reason,omitempty"`
Tokens []string `json:"tokens,omitempty"`
Candidates []Candidate `json:"candidates,omitempty"`
ModelVer string `json:"modelVersion,omitempty"`
UnknownUsed bool `json:"unknownUsed"`
}
Prediction is one inference result. Candidates are always the raw ranked intent hypotheses before threshold rejection, so callers can pass them to an LLM/tool planner even when Intent is unknown.
type Router ¶
type Router struct {
// contains filtered or unexported fields
}
Router routes inputs to language-specific intent engines.
func NewRouterFromBundle ¶
NewRouterFromBundle loads multilingual models from a bundle directory.
func NewRouterFromDirs ¶
NewRouterFromDirs loads language models from directories.
func NewRouterFromEmbedded ¶
NewRouterFromEmbedded loads router from embedded multilingual bundle assets.
func NewRouterFromEmbeddedIn ¶
NewRouterFromEmbeddedIn loads router from embedded assets and extracts files under cacheDir. If cacheDir is empty, user cache directory (or system temp) is used.
func NewRouterWithOptions ¶
func NewRouterWithOptions(defaultLang string, options RouterOptions) *Router
NewRouterWithOptions creates an empty router with options.
func (*Router) Meta ¶ added in v0.3.0
Meta returns the default engine's model metadata, or empty if no engine is loaded.
func (*Router) Predict ¶
func (r *Router) Predict(ctx context.Context, text string, opts PredictOptions) (Prediction, error)
Predict routes by language hint/detection and returns prediction.
type RouterOptions ¶
type RouterOptions struct {
AutoDetectMinConfidence float64
ShortTextRuneLimit int
EnableCrossLanguageFallback bool
}
RouterOptions controls auto language routing behavior.
func DefaultRouterOptions ¶
func DefaultRouterOptions() RouterOptions
DefaultRouterOptions returns default router behavior.
type Sample ¶
Sample is one supervised training sample.
func LoadSamplesCSV ¶
LoadSamplesCSV loads labeled samples from CSV. Expected columns: text,intent (header row optional).
type SourceMetadata ¶
type SourceMetadata struct {
Name string `json:"name,omitempty"`
Version string `json:"version,omitempty"`
Revision string `json:"revision,omitempty"`
RepoURL string `json:"repoUrl,omitempty"`
Commit string `json:"commit,omitempty"`
Extra map[string]string `json:"extra,omitempty"`
}
SourceMetadata describes training data source for reproducibility.
type TaxonomyConfig ¶
type TaxonomyConfig struct {
Enabled bool `json:"enabled"`
Aliases map[string]string `json:"aliases,omitempty"`
}
TaxonomyConfig controls intent canonicalization.
func DefaultTaxonomyConfig ¶
func DefaultTaxonomyConfig() TaxonomyConfig
DefaultTaxonomyConfig returns default taxonomy config.
type Tokenizer ¶
type Tokenizer struct {
// contains filtered or unexported fields
}
Tokenizer wraps language-specific tokenization with normalization and filtering.
func NewTokenizer ¶
func NewTokenizer(cfg TokenizerConfig) (*Tokenizer, error)
NewTokenizer creates a tokenizer from config.
func (*Tokenizer) Config ¶
func (t *Tokenizer) Config() TokenizerConfig
Config returns a copy of tokenizer config.
type TokenizerConfig ¶
type TokenizerConfig struct {
Language string `json:"language,omitempty"`
SearchMode bool `json:"searchMode"`
HMM bool `json:"hmm"`
Lowercase bool `json:"lowercase"`
Stopwords []string `json:"stopwords,omitempty"`
CustomDicts []string `json:"customDicts,omitempty"`
MinTokenLen int `json:"minTokenLen"`
StripPunct bool `json:"stripPunct"`
CollapseSpace bool `json:"collapseSpace"`
}
TokenizerConfig defines tokenizer behavior.
func DefaultTokenizerConfig ¶
func DefaultTokenizerConfig() TokenizerConfig
DefaultTokenizerConfig returns a practical default tokenizer config.
type TrainConfig ¶
type TrainConfig struct {
Version string `json:"version"`
UnknownIntent string `json:"unknownIntent"`
DefaultThreshold float64 `json:"defaultThreshold"`
Thresholds map[string]float64 `json:"thresholds,omitempty"`
Tokenizer TokenizerConfig `json:"tokenizer"`
Split DatasetSplitConfig `json:"split"`
AutoCalibrateThresholds bool `json:"autoCalibrateThresholds"`
Taxonomy TaxonomyConfig `json:"taxonomy"`
Source SourceMetadata `json:"source,omitempty"`
}
TrainConfig controls training behavior.
func DefaultTrainConfig ¶
func DefaultTrainConfig() TrainConfig
DefaultTrainConfig returns a practical default training config.
type TrainedModel ¶
type TrainedModel struct {
// contains filtered or unexported fields
}
TrainedModel is an in-memory trained artifact.
func Train ¶
func Train(samples []Sample, cfg TrainConfig) (*TrainedModel, error)
Train trains a bayesian model from labeled samples and produces metadata.
func (*TrainedModel) Meta ¶
func (m *TrainedModel) Meta() ModelMeta
Meta returns a copy of model metadata.
func (*TrainedModel) SaveDir ¶
func (m *TrainedModel) SaveDir(dir string) error
SaveDir persists the trained model into a directory.
type TrainingConfigSnapshot ¶
type TrainingConfigSnapshot struct {
DefaultThreshold float64 `json:"defaultThreshold"`
Thresholds map[string]float64 `json:"thresholds,omitempty"`
Tokenizer TokenizerConfig `json:"tokenizer"`
Split DatasetSplitConfig `json:"split"`
AutoCalibrateThresholds bool `json:"autoCalibrateThresholds"`
TaxonomyEnabled bool `json:"taxonomyEnabled"`
}
TrainingConfigSnapshot stores effective config used for training.
type TrainingMetadata ¶
type TrainingMetadata struct {
Seed int64 `json:"seed"`
TotalSampleCount int `json:"totalSampleCount"`
TrainSampleCount int `json:"trainSampleCount"`
ValSampleCount int `json:"valSampleCount"`
TestSampleCount int `json:"testSampleCount"`
Calibrated bool `json:"calibrated"`
CalibratedThresholds map[string]float64 `json:"calibratedThresholds,omitempty"`
DataSummary map[string]IntentDataSummary `json:"dataSummary,omitempty"`
Config TrainingConfigSnapshot `json:"config"`
}
TrainingMetadata stores reproducibility and data summary info.
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
cmd
|
|
|
intent-nlu-bundle
command
|
|
|
intent-nlu-feedback
command
|
|
|
intent-nlu-predict
command
|
|
|
intent-nlu-train
command
|
|
|
dataset
|
|