Documentation
¶
Overview ¶
taken from https://github.com/Anush008/fastembed-go
Index ¶
- Constants
- Variables
- func AntarysTitle()
- func BatchCosineSimilarity(queries [][]float32, targets [][]float32) [][]float32
- func BatchNormalizeVectors(vectors [][]float32)
- func FileExists(path string) bool
- func GetOptimalVector(size int) []float32
- func GetSIMDWidth() int
- func HasModel(model EmbeddingModel, cacheDir string) bool
- func HasOnnxRuntime(runtime OnnxRuntime, cacheDir string) bool
- func IsModelValid(id string, cacheDir string) bool
- func IsOnnxValid(cacheDir string) bool
- func LoadDynamicOnnx() (string, error)
- func MatrixVectorMultiply(matrix [][]float32, vector []float32) []float32
- func NormaliseVector(vector []float32)
- func NormaliseVectorLockless(vector []float32)
- func NormaliseVectors(vectors [][]float32)
- func ResetMathStats()
- func RetrieveModel(model EmbeddingModel, cacheDir string, showDownloadProgress bool) (string, error)
- func RetrieveRuntime(runtime OnnxRuntime, cacheDir string, showDownloadProgress bool) (string, error)
- func TestPrecisionAccuracy() map[string]float64
- func ValidateChecksum(path string, expected string) bool
- func ValidateOnnxAndModel(modelID string, cacheDir string) error
- type AntarysDB
- func (db *AntarysDB) BatchInsert(collection string, batch []*BatchInsert) error
- func (db *AntarysDB) BatchInsertInCollection(collection string, records []struct{ ... }) error
- func (db *AntarysDB) BuildHNSWIndex(collectionName string, options IndexingOptions) error
- func (db *AntarysDB) BuildIndex(collectionName string, options IndexingOptions) error
- func (db *AntarysDB) Close() error
- func (db *AntarysDB) Commit() error
- func (db *AntarysDB) Count() int64
- func (db *AntarysDB) CountInCollection(collection string) int64
- func (db *AntarysDB) CreateCollection(name string, dimensions int) error
- func (db *AntarysDB) CreateCollectionWithOptions(name string, dimensions int, options IndexingOptions) error
- func (db *AntarysDB) Delete(id string) error
- func (db *AntarysDB) DeleteCollection(name string) error
- func (db *AntarysDB) DeleteFromCollection(collection string, id string) error
- func (db *AntarysDB) Dump(outputDir string) error
- func (db *AntarysDB) ExportToJSON(collectionName string, outputFile string) error
- func (db *AntarysDB) Get(id string) (VectorRecord, error)
- func (db *AntarysDB) GetCollectionDimensions(collectionName string) (int, error)
- func (db *AntarysDB) GetFromCollection(collection, id string) (VectorRecord, error)
- func (db *AntarysDB) GetStats() map[string]interface{}
- func (db *AntarysDB) InsertInCollection(collection string, id string, vector []float32, metadata any) error
- func (db *AntarysDB) ListCollections() []string
- func (db *AntarysDB) ListIDs(collectionName string) ([]string, error)
- func (db *AntarysDB) Load() error
- func (db *AntarysDB) LoadFromJSON(collectionName string, inputFile string) error
- func (db *AntarysDB) PurgeCache()
- func (db *AntarysDB) RebuildIndex(collectionName string, options IndexingOptions) error
- func (db *AntarysDB) Restore(inputDir string) error
- func (db *AntarysDB) SearchByMetadata(filter func(any) bool, opts *SearchOptions) ([]VectorRecord, error)
- func (db *AntarysDB) SearchByMetadataInCollection(collection string, filter func(any) bool, opts *SearchOptions) ([]VectorRecord, error)
- func (db *AntarysDB) SearchInCollection(collectionName string, query []float32, opts *SearchOptions) ([]SearchResult, error)
- func (db *AntarysDB) StartAutoCommit(interval time.Duration)
- func (db *AntarysDB) UpdateMetadata(collection, id string, metadata any) error
- type BatchInsert
- type Collection
- type Config
- type DBStats
- type EmbeddingModel
- type FlagEmbedding
- type IndexingOptions
- type InitOptions
- type LRUCache
- type MathStats
- type MemoryoptimisedVector
- type ModelInfo
- type OnnxRuntime
- type QuantizationMethod
- type QuantizedVector
- type QueryRequest
- type SearchMethod
- type SearchOptions
- type SearchResult
- type ShardedCollectionData
- type VectorRecord
- type Worker
- type WorkerPool
Constants ¶
View Source
const ( DefaultDimension = 1536 DefaultMaxResults = 100 DefaultSimilarityThreshold = 0.0 DefaultCommitInterval = 30 * time.Second DefaultDataDirName = "data" DefaultCacheSize = 5000 DefaultQueryConcurrency = 4 DefaultHNSWM = 16 DefaultHNSWEfConstruction = 200 DefaultHNSWEfSearch = 64 MagicHeader = "ANTARYS" FormatVersion = 1 DefaultShards = 8 DefaultBatchSize = 1000 MinAccuracyCandidates = 64 MaxHNSWLevels = 12 )
View Source
const ANTARYS_TITLE = `` /* 184-byte string literal not displayed */
View Source
const VERSION string = "0.2.0"
Variables ¶
View Source
var ModelsByIndex = map[int]string{
1: "fast-bge-base-en",
2: "fast-bge-base-en-v1.5",
3: "fast-bge-small-en",
4: "fast-bge-small-en-v1.5",
5: "fast-bge-small-zh-v1.5",
}
View Source
var ValidChecksumFiles map[string]string = map[string]string{
"libonnxruntime.1.22.0.dylib": "db045368293215c9d22aa7b8c983d688b3ae9ca1da3f64ffbe01ba7df31c3355",
"libonnxruntime_arm64.so": "0afd69a0ae38c5099fd0e8604dda398ac43dee67cd9c6394b5142b19e82528de",
"libonnxruntime_x64.so": "3da6146e14e7b8aaec625dde11d6114c7457c87a5f93d744897da8781e35c673",
}
Functions ¶
func AntarysTitle ¶
func AntarysTitle()
func BatchCosineSimilarity ¶
func BatchNormalizeVectors ¶
func BatchNormalizeVectors(vectors [][]float32)
func FileExists ¶
func GetOptimalVector ¶
func GetSIMDWidth ¶
func GetSIMDWidth() int
func HasModel ¶
func HasModel(model EmbeddingModel, cacheDir string) bool
func HasOnnxRuntime ¶
func HasOnnxRuntime(runtime OnnxRuntime, cacheDir string) bool
func IsModelValid ¶
func IsOnnxValid ¶
func LoadDynamicOnnx ¶
func MatrixVectorMultiply ¶
func NormaliseVector ¶
func NormaliseVector(vector []float32)
func NormaliseVectorLockless ¶
func NormaliseVectorLockless(vector []float32)
func NormaliseVectors ¶
func NormaliseVectors(vectors [][]float32)
func ResetMathStats ¶
func ResetMathStats()
func RetrieveModel ¶
func RetrieveModel(model EmbeddingModel, cacheDir string, showDownloadProgress bool) (string, error)
func RetrieveRuntime ¶
func RetrieveRuntime(runtime OnnxRuntime, cacheDir string, showDownloadProgress bool) (string, error)
func TestPrecisionAccuracy ¶
func ValidateChecksum ¶
func ValidateOnnxAndModel ¶
Types ¶
type AntarysDB ¶
type AntarysDB struct {
Collections map[string]*Collection
Mutex sync.RWMutex
// contains filtered or unexported fields
}
func NewAntarysDB ¶
func NewAntarysDBWithConfig ¶
func (*AntarysDB) BatchInsert ¶
func (db *AntarysDB) BatchInsert(collection string, batch []*BatchInsert) error
func (*AntarysDB) BatchInsertInCollection ¶
func (*AntarysDB) BuildHNSWIndex ¶
func (db *AntarysDB) BuildHNSWIndex(collectionName string, options IndexingOptions) error
func (*AntarysDB) BuildIndex ¶
func (db *AntarysDB) BuildIndex(collectionName string, options IndexingOptions) error
func (*AntarysDB) CountInCollection ¶
func (*AntarysDB) CreateCollection ¶
func (*AntarysDB) CreateCollectionWithOptions ¶
func (db *AntarysDB) CreateCollectionWithOptions(name string, dimensions int, options IndexingOptions) error
func (*AntarysDB) DeleteCollection ¶
func (*AntarysDB) DeleteFromCollection ¶
func (*AntarysDB) ExportToJSON ¶
func (*AntarysDB) GetCollectionDimensions ¶
func (*AntarysDB) GetFromCollection ¶
func (db *AntarysDB) GetFromCollection(collection, id string) (VectorRecord, error)
func (*AntarysDB) InsertInCollection ¶
func (*AntarysDB) ListCollections ¶
func (*AntarysDB) LoadFromJSON ¶
func (*AntarysDB) PurgeCache ¶
func (db *AntarysDB) PurgeCache()
func (*AntarysDB) RebuildIndex ¶
func (db *AntarysDB) RebuildIndex(collectionName string, options IndexingOptions) error
func (*AntarysDB) SearchByMetadata ¶
func (db *AntarysDB) SearchByMetadata(filter func(any) bool, opts *SearchOptions) ([]VectorRecord, error)
func (*AntarysDB) SearchByMetadataInCollection ¶
func (db *AntarysDB) SearchByMetadataInCollection(collection string, filter func(any) bool, opts *SearchOptions) ([]VectorRecord, error)
func (*AntarysDB) SearchInCollection ¶
func (db *AntarysDB) SearchInCollection(collectionName string, query []float32, opts *SearchOptions) ([]SearchResult, error)
func (*AntarysDB) StartAutoCommit ¶
type BatchInsert ¶
type Collection ¶
type Collection struct {
Name string
Dimensions int `json:"dimensions"`
VectorCount int64
Shards []*ShardedCollectionData
ShardMask uint32
Initialized bool
IndexOptions IndexingOptions
}
type Config ¶
type Config struct {
DataDir string `json:"data_dir"`
CommitInterval time.Duration `json:"commit_interval"`
CacheSize int `json:"cache_size"`
QueryConcurrency int `json:"query_concurrency"`
IndexingOptions IndexingOptions `json:"indexing_options"`
EncryptionKey []byte `json:"-"`
Compression bool `json:"compression"`
EnableProfiling bool `json:"enable_profiling"`
}
type EmbeddingModel ¶
type EmbeddingModel string
const ( BGEBaseEN EmbeddingModel = "fast-bge-base-en" BGEBaseENV15 EmbeddingModel = "fast-bge-base-en-v1.5" BGESmallEN EmbeddingModel = "fast-bge-small-en" BGESmallENV15 EmbeddingModel = "fast-bge-small-en-v1.5" BGESmallZH EmbeddingModel = "fast-bge-small-zh-v1.5" )
type FlagEmbedding ¶
type FlagEmbedding struct {
// contains filtered or unexported fields
}
func NewFlagEmbedding ¶
func NewFlagEmbedding(options *InitOptions) (*FlagEmbedding, error)
func (*FlagEmbedding) Destroy ¶
func (f *FlagEmbedding) Destroy() error
func (*FlagEmbedding) Embed ¶
func (f *FlagEmbedding) Embed(input []string, batchSize int) ([]([]float32), error)
func (*FlagEmbedding) PassageEmbed ¶
func (f *FlagEmbedding) PassageEmbed(input []string, batchSize int) ([]([]float32), error)
func (*FlagEmbedding) QueryEmbed ¶
func (f *FlagEmbedding) QueryEmbed(input string) ([]float32, error)
type IndexingOptions ¶
type IndexingOptions struct {
UseHNSW bool `json:"use_hnsw"`
M int `json:"m"`
N int `json:"n"`
EfConstruction int `json:"ef_construction"`
Quantization QuantizationMethod `json:"quantization"`
NumShards int `json:"num_shards"`
ParallelConstruction bool `json:"parallel_construction"`
EdgesPerCentroid int `json:"edges_per_centroid"`
UsePQ bool `json:"use_pq"`
NumPQSubspaces int `json:"num_pq_subspaces"`
NumPQClusters int `json:"num_pq_clusters"`
SubRegionPortion float32 `json:"sub_region_portion"`
VLQMaxResults int `json:"vlq_max_results"`
}
type InitOptions ¶
type InitOptions struct {
Model EmbeddingModel
ExecutionProviders []string
MaxLength int
CacheDir string
ShowDownloadProgress *bool
}
type MathStats ¶
type MathStats struct {
TotalOperations int64
FastPathOps int64
BlasOperations int64
PrecisionFallbacks int64
VectorOperations int64
}
func GetMathStats ¶
func GetMathStats() MathStats
type MemoryoptimisedVector ¶
type ModelInfo ¶
type ModelInfo struct {
Model EmbeddingModel
Dim int
Description string
}
func ListSupportedModels ¶
func ListSupportedModels() []ModelInfo
type OnnxRuntime ¶
type OnnxRuntime string
const (
ORT_MAC_UNIVERSAL OnnxRuntime = "libonnxruntime.1.22.0.dylib"
)
type QuantizationMethod ¶
type QuantizationMethod int
const ( QuantizationNone QuantizationMethod = 0 QuantizationScalar8Bit QuantizationMethod = 1 QuantizationPQ QuantizationMethod = 2 QuantizationOPQ QuantizationMethod = 3 QuantizationResidualPQ QuantizationMethod = 4 DefaultQuantization = QuantizationNone )
type QuantizedVector ¶
type QueryRequest ¶
type QueryRequest struct {
Query []float32
Options *SearchOptions
ResultCh chan<- []SearchResult
ErrorCh chan<- error
}
type SearchMethod ¶
type SearchMethod int
const ( SearchMethodBruteForce SearchMethod = iota SearchMethodHNSW SearchMethodVLQADC )
type SearchOptions ¶
type SearchOptions struct {
MaxResults int `json:"max_results"`
SimilarityThreshold float32 `json:"similarity_threshold"`
MetadataFilter func(any) bool `json:"-"`
SortByCreated bool `json:"sort_by_created"`
SortByUpdated bool `json:"sort_by_updated"`
Quantization QuantizationMethod `json:"quantization"`
UseANN bool `json:"use_ann"`
EfSearch int `json:"ef_search"`
BFSMode bool `json:"bfs_mode"`
GreedyMode bool `json:"greedy_mode"`
SearchContext context.Context `json:"-"`
}
type SearchResult ¶
type SearchResult struct {
Record VectorRecord `json:"record"`
Score float32 `json:"score"`
Distance float32 `json:"distance"`
}
type ShardedCollectionData ¶
type ShardedCollectionData struct {
Vectors map[string][]float32
Quantized map[string]QuantizedVector
Metadata map[string]any
Created map[string]time.Time
Updated map[string]time.Time
HNSWGraph *hnswGraph
Mutex sync.RWMutex
PQCentroids [][]float32
PQCodes map[string][]byte
NumSubspaces int
AsyncWorkerStop chan struct{}
AsyncWorkerGroup sync.WaitGroup
// contains filtered or unexported fields
}
func (*ShardedCollectionData) StopAsyncWorkers ¶
func (shard *ShardedCollectionData) StopAsyncWorkers()
type VectorRecord ¶
type WorkerPool ¶
type WorkerPool struct {
// contains filtered or unexported fields
}
func NewWorkerPool ¶
func NewWorkerPool(size int) *WorkerPool
func (*WorkerPool) Submit ¶
func (wp *WorkerPool) Submit(job func())
Click to show internal directories.
Click to hide internal directories.