Documentation
¶
Overview ¶
`insyra` main package provides unified interfaces and structures for data manipulation and analysis.
Index ¶
- Constants
- Variables
- func CalcColIndex(colNumber int) (colIndex string, ok bool)
- func ClearErrors()
- func DetectEncoding(filePath string) (string, error)
- func GetErrorCount() int
- func HasError() bool
- func HasErrorAboveLevel(level LogLevel) bool
- func IsNumeric(v any) bool
- func LogDebug(packageName, funcName, msg string, args ...any)
- func LogFatal(packageName, funcName, msg string, args ...any)
- func LogInfo(packageName, funcName, msg string, args ...any)
- func LogWarning(packageName, funcName, msg string, args ...any)
- func ParseColIndex(colIndex string) (colNumber int, ok bool)
- func PopErrorAndCallback(mode ErrPoppingMode, ...)
- func PowRat(base *big.Rat, exponent int) *big.Rat
- func ProcessData(input any) ([]any, int)
- func ReadSQLStream(ctx context.Context, db *gorm.DB, tableName string, options ...ReadSQLOptions) (<-chan ReadSQLChunk, error)
- func SetDefaultConfig()
- func Show(label string, object showable, startEnd ...any)
- func SliceToF64(input []any) []float64
- func SortTimes(times []time.Time)
- func SqrtRat(x *big.Rat) *big.Rat
- type AggregateConfig
- type AggregateOp
- type DataList
- func (dl *DataList) Append(values ...any) *DataList
- func (dl *DataList) AppendDataList(other IDataList) *DataList
- func (s *DataList) AtomicDo(f func(*DataList))
- func (dl *DataList) Capitalize() *DataList
- func (dl *DataList) Clear() *DataList
- func (dl *DataList) ClearErr() *DataList
- func (dl *DataList) ClearNaNs() *DataList
- func (dl *DataList) ClearNils() *DataList
- func (dl *DataList) ClearNilsAndNaNs() *DataList
- func (dl *DataList) ClearNumbers() *DataList
- func (dl *DataList) ClearOutliers(stdDevs float64) *DataList
- func (dl *DataList) ClearStrings() *DataList
- func (dl *DataList) Clone() *DataList
- func (s *DataList) Close()
- func (dl *DataList) Concat(other IDataList) *DataList
- func (dl *DataList) Count(value any) int
- func (dl *DataList) Counter() map[any]int
- func (dl *DataList) CumMax() *DataList
- func (dl *DataList) CumMin() *DataList
- func (dl *DataList) CumProd() *DataList
- func (dl *DataList) CumSum() *DataList
- func (dl *DataList) Data() []any
- func (dl *DataList) Describe(options ...DescribeOptions) *DataTable
- func (dl *DataList) Diff(periods int) *DataList
- func (dl *DataList) Difference() *DataList
- func (dl *DataList) DoubleExponentialSmoothing(alpha, beta float64) *DataList
- func (dl *DataList) Drop(index int) *DataList
- func (dl *DataList) DropAll(toDrop ...any) *DataList
- func (dl *DataList) DropIfContains(substring string) *DataList
- func (dl *DataList) Err() *ErrorInfo
- func (dl *DataList) Expanding(minObs int) *ExpandingDataList
- func (dl *DataList) ExponentialSmoothing(alpha float64) *DataList
- func (dl *DataList) FillBackward(limit ...int) *DataList
- func (dl *DataList) FillByInterpolation(extrapolate ...bool) *DataList
- func (dl *DataList) FillForward(limit ...int) *DataList
- func (dl *DataList) FillNaNWithMean() *DataListdeprecated
- func (dl *DataList) FillWithMean() *DataList
- func (dl *DataList) FillWithMedian() *DataList
- func (dl *DataList) FillWithMode() *DataList
- func (dl *DataList) Filter(filterFunc func(any) bool) *DataList
- func (dl *DataList) FindAll(value any) []int
- func (dl *DataList) FindFirst(value any) any
- func (dl *DataList) FindLast(value any) any
- func (dl *DataList) GMean() float64
- func (dl *DataList) Get(index int) any
- func (dl *DataList) GetCreationTimestamp() int64
- func (dl *DataList) GetLastModifiedTimestamp() int64
- func (dl *DataList) GetName() string
- func (dl *DataList) HermiteInterpolation(x float64, derivatives []float64) float64
- func (dl *DataList) IQR() float64
- func (dl *DataList) InsertAt(index int, value any) *DataList
- func (dl *DataList) IsEqualTo(anotherDl *DataList) bool
- func (dl *DataList) IsTheSameAs(anotherDl *DataList) bool
- func (dl *DataList) LagrangeInterpolation(x float64) float64
- func (dl *DataList) Len() int
- func (dl *DataList) LinearInterpolation(x float64) float64
- func (dl *DataList) Lower() *DataList
- func (dl *DataList) MAD() float64
- func (dl *DataList) Map(mapFunc func(int, any) any) *DataList
- func (dl *DataList) Max() float64
- func (dl *DataList) Mean() float64
- func (dl *DataList) Median() float64
- func (dl *DataList) Min() float64
- func (dl *DataList) Mode() []float64
- func (dl *DataList) MovingAverage(windowSize int) *DataList
- func (dl *DataList) MovingStdev(windowSize int) *DataList
- func (dl *DataList) NearestNeighborInterpolation(x float64) float64
- func (dl *DataList) NewtonInterpolation(x float64) float64
- func (dl *DataList) Normalize() *DataList
- func (dl *DataList) ParseNumbers() *DataList
- func (dl *DataList) ParseStrings() *DataList
- func (dl *DataList) PctChange(periods int) *DataList
- func (dl *DataList) Percentile(p float64) float64
- func (dl *DataList) Pop() any
- func (dl *DataList) QuadraticInterpolation(x float64) float64
- func (dl *DataList) Quartile(q int) float64
- func (dl *DataList) Range() float64
- func (dl *DataList) Rank(ascending ...bool) *DataList
- func (dl *DataList) ReplaceAll(oldValue, newValue any) *DataList
- func (dl *DataList) ReplaceFirst(oldValue, newValue any) *DataList
- func (dl *DataList) ReplaceLast(oldValue, newValue any) *DataList
- func (dl *DataList) ReplaceNaNsAndNilsWith(value any) *DataList
- func (dl *DataList) ReplaceNaNsWith(value any) *DataList
- func (dl *DataList) ReplaceNilsWith(value any) *DataList
- func (dl *DataList) ReplaceOutliers(stdDevs float64, replacement float64) *DataList
- func (dl *DataList) Reverse() *DataList
- func (dl *DataList) Rolling(opts RollingOptions) *RollingDataList
- func (dl *DataList) Sample(n int, withReplacement bool, options ...SamplingOptions) *DataList
- func (dl *DataList) SampleFrac(frac float64, withReplacement bool, options ...SamplingOptions) *DataList
- func (dl *DataList) SetName(newName string) *DataList
- func (dl *DataList) Shift(periods int, fill ...any) *DataList
- func (dl *DataList) Show()
- func (dl *DataList) ShowRange(startEnd ...any)
- func (dl *DataList) ShowTypes()
- func (dl *DataList) ShowTypesRange(startEnd ...any)
- func (dl *DataList) Shuffle(options ...SamplingOptions) *DataList
- func (dl *DataList) Sort(ascending ...bool) *DataList
- func (dl *DataList) Standardize() *DataList
- func (dl *DataList) Stdev() float64
- func (dl *DataList) StdevP() float64
- func (dl *DataList) Sum() float64
- func (dl *DataList) Summary()
- func (dl *DataList) ToF64Slice() []float64
- func (dl *DataList) ToStringSlice() []string
- func (dl *DataList) Update(index int, newValue any) *DataList
- func (dl *DataList) Upper() *DataList
- func (dl *DataList) Var() float64
- func (dl *DataList) VarP() float64
- func (dl *DataList) WeightedMean(weights any) float64
- func (dl *DataList) WeightedMovingAverage(windowSize int, weights any) *DataList
- type DataListScaler
- type DataTable
- func NewDataTable(columns ...*DataList) *DataTable
- func ReadCSV_File(filePath string, setFirstColToRowNames bool, setFirstRowToColNames bool, ...) (*DataTable, error)
- func ReadCSV_String(csvString string, setFirstColToRowNames bool, setFirstRowToColNames bool) (*DataTable, error)
- func ReadExcelSheet(filePath string, sheetName string, setFirstColToRowNames bool, ...) (*DataTable, error)
- func ReadJSON(data any) (*DataTable, error)
- func ReadJSON_File(filePath string) (*DataTable, error)
- func ReadSQL(db *gorm.DB, tableName string, options ...ReadSQLOptions) (*DataTable, error)
- func ReadSQLContext(ctx context.Context, db *gorm.DB, tableName string, options ...ReadSQLOptions) (*DataTable, error)
- func Slice2DToDataTable(data any) (*DataTable, error)
- func (dt *DataTable) AddColUsingCCL(newColName, cclFormula string) *DataTable
- func (dt *DataTable) AppendCols(columns ...*DataList) *DataTable
- func (dt *DataTable) AppendRowsByColIndex(rowsData ...map[string]any) *DataTable
- func (dt *DataTable) AppendRowsByColName(rowsData ...map[string]any) *DataTable
- func (dt *DataTable) AppendRowsFromDataList(rowsData ...*DataList) *DataTable
- func (dt *DataTable) AtomicDo(f func(*DataTable))
- func (dt *DataTable) ChangeColName(oldName, newName string) *DataTable
- func (dt *DataTable) ChangeRowName(oldName, newName string) *DataTable
- func (dt *DataTable) ClearErr() *DataTable
- func (dt *DataTable) Clone() *DataTable
- func (dt *DataTable) Close()
- func (dt *DataTable) ColNames() []string
- func (dt *DataTable) ColNamesToFirstRow() *DataTable
- func (dt *DataTable) Count(value any) int
- func (dt *DataTable) Counter() map[any]int
- func (dt *DataTable) CumMaxCol(col string) *DataList
- func (dt *DataTable) CumMinCol(col string) *DataList
- func (dt *DataTable) CumProdCol(col string) *DataList
- func (dt *DataTable) CumSumCol(col string) *DataList
- func (dt *DataTable) Data(useNamesAsKeys ...bool) map[string][]any
- func (dt *DataTable) Describe(options ...DescribeOptions) *DataTable
- func (dt *DataTable) DiffCol(col string, periods int) *DataList
- func (dt *DataTable) DropColNames() *DataTable
- func (dt *DataTable) DropColsByIndex(columnIndices ...string) *DataTable
- func (dt *DataTable) DropColsByName(columnNames ...string) *DataTable
- func (dt *DataTable) DropColsByNumber(columnIndices ...int) *DataTable
- func (dt *DataTable) DropColsContain(value ...any) *DataTable
- func (dt *DataTable) DropColsContainExcelNA() *DataTable
- func (dt *DataTable) DropColsContainNaN() *DataTable
- func (dt *DataTable) DropColsContainNil() *DataTable
- func (dt *DataTable) DropColsContainNumber() *DataTable
- func (dt *DataTable) DropColsContainString() *DataTable
- func (dt *DataTable) DropRowNames() *DataTable
- func (dt *DataTable) DropRowsByIndex(rowIndices ...int) *DataTable
- func (dt *DataTable) DropRowsByName(rowNames ...string) *DataTable
- func (dt *DataTable) DropRowsContain(value ...any) *DataTable
- func (dt *DataTable) DropRowsContainExcelNA() *DataTable
- func (dt *DataTable) DropRowsContainNaN() *DataTable
- func (dt *DataTable) DropRowsContainNil() *DataTable
- func (dt *DataTable) DropRowsContainNumber() *DataTable
- func (dt *DataTable) DropRowsContainString() *DataTable
- func (dt *DataTable) EditColByIndexUsingCCL(colIndex, cclFormula string) *DataTable
- func (dt *DataTable) EditColByNameUsingCCL(colName, cclFormula string) *DataTable
- func (dt *DataTable) Err() *ErrorInfo
- func (dt *DataTable) ExecuteCCL(cclStatements string) *DataTable
- func (dt *DataTable) ExpandingCol(col string, minObs int) *ExpandingDataList
- func (dt *DataTable) FillBackward(limit int, cols ...string) *DataTable
- func (dt *DataTable) FillByInterpolation(cols ...string) *DataTable
- func (dt *DataTable) FillForward(limit int, cols ...string) *DataTable
- func (dt *DataTable) FillWithMean(cols ...string) *DataTable
- func (dt *DataTable) FillWithMedian(cols ...string) *DataTable
- func (dt *DataTable) FillWithMode(cols ...string) *DataTable
- func (dt *DataTable) Filter(filterFunc func(rowIndex int, columnIndex string, value any) bool) *DataTable
- func (dt *DataTable) FilterByCustomElement(filterFunc func(value any) bool) *DataTable
- func (dt *DataTable) FilterCols(filterFunc func(rowIndex int, rowName string, x any) bool) *DataTable
- func (dt *DataTable) FilterColsByColIndexEqualTo(columnIndexLetter string) *DataTable
- func (dt *DataTable) FilterColsByColIndexGreaterThan(columnIndexLetter string) *DataTable
- func (dt *DataTable) FilterColsByColIndexGreaterThanOrEqualTo(columnIndexLetter string) *DataTable
- func (dt *DataTable) FilterColsByColIndexLessThan(columnIndexLetter string) *DataTable
- func (dt *DataTable) FilterColsByColIndexLessThanOrEqualTo(columnIndexLetter string) *DataTable
- func (dt *DataTable) FilterColsByColNameContains(substring string) *DataTable
- func (dt *DataTable) FilterColsByColNameEqualTo(columnName string) *DataTable
- func (dt *DataTable) FilterRows(filterFunc func(colIndex, colName string, x any) bool) *DataTable
- func (dt *DataTable) FilterRowsByRowIndexEqualTo(index int) *DataTable
- func (dt *DataTable) FilterRowsByRowIndexGreaterThan(threshold int) *DataTable
- func (dt *DataTable) FilterRowsByRowIndexGreaterThanOrEqualTo(threshold int) *DataTable
- func (dt *DataTable) FilterRowsByRowIndexLessThan(threshold int) *DataTable
- func (dt *DataTable) FilterRowsByRowIndexLessThanOrEqualTo(threshold int) *DataTable
- func (dt *DataTable) FilterRowsByRowNameContains(substring string) *DataTable
- func (dt *DataTable) FilterRowsByRowNameEqualTo(rowName string) *DataTable
- func (dt *DataTable) FindColsIfAllElementsContainSubstring(substring string) []string
- func (dt *DataTable) FindColsIfAnyElementContainsSubstring(substring string) []string
- func (dt *DataTable) FindColsIfContains(value any) []string
- func (dt *DataTable) FindColsIfContainsAll(values ...any) []string
- func (dt *DataTable) FindRowsIfAllElementsContainSubstring(substring string) []int
- func (dt *DataTable) FindRowsIfAnyElementContainsSubstring(substring string) []int
- func (dt *DataTable) FindRowsIfContains(value any) []int
- func (dt *DataTable) FindRowsIfContainsAll(values ...any) []int
- func (dt *DataTable) GetCol(index string) *DataList
- func (dt *DataTable) GetColByName(name string) *DataList
- func (dt *DataTable) GetColByNumber(index int) *DataList
- func (dt *DataTable) GetColIndexByName(name string) string
- func (dt *DataTable) GetColIndexByNumber(number int) string
- func (dt *DataTable) GetColNameByIndex(index string) string
- func (dt *DataTable) GetColNameByNumber(index int) string
- func (dt *DataTable) GetColNumberByName(name string) int
- func (dt *DataTable) GetCreationTimestamp() int64
- func (dt *DataTable) GetElement(rowIndex int, columnIndex string) any
- func (dt *DataTable) GetElementByNumberIndex(rowIndex int, columnIndex int) any
- func (dt *DataTable) GetLastModifiedTimestamp() int64
- func (dt *DataTable) GetName() string
- func (dt *DataTable) GetRow(index int) *DataList
- func (dt *DataTable) GetRowByName(name string) *DataList
- func (dt *DataTable) GetRowIndexByName(name string) (int, bool)
- func (dt *DataTable) GetRowNameByIndex(index int) (string, bool)
- func (dt *DataTable) GroupBy(keyCols ...string) *GroupedDataTable
- func (dt *DataTable) Headers() []string
- func (dt *DataTable) LabelEncode(opts LabelEncodeOptions) (*DataTable, *LabelEncoder, error)
- func (dt *DataTable) Map(mapFunc func(rowIndex int, colIndex string, element any) any) *DataTable
- func (dt *DataTable) MaxAbsScale(cols ...string) (*DataTable, *MaxAbsScaler, error)
- func (dt *DataTable) Mean() any
- func (dt *DataTable) Merge(other IDataTable, direction MergeDirection, mode MergeMode, on ...string) (*DataTable, error)
- func (dt *DataTable) MinMaxScale(featureMin, featureMax float64, cols ...string) (*DataTable, *MinMaxScaler, error)
- func (dt *DataTable) NumCols() int
- func (dt *DataTable) NumRows() int
- func (dt *DataTable) OneHotEncode(opts OneHotOptions) (*DataTable, *OneHotEncoder, error)
- func (dt *DataTable) OrdinalEncode(opts OrdinalEncodeOptions) (*DataTable, *OrdinalEncoder, error)
- func (dt *DataTable) PctChangeCol(col string, periods int) *DataList
- func (dt *DataTable) Pivot(cfg PivotConfig) (*DataTable, error)
- func (dt *DataTable) Replace(oldValue, newValue any) *DataTable
- func (dt *DataTable) ReplaceInCol(colIndex string, oldValue, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceInRow(rowIndex int, oldValue, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceNaNsAndNilsInCol(colIndex string, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceNaNsAndNilsInRow(rowIndex int, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceNaNsAndNilsWith(newValue any) *DataTable
- func (dt *DataTable) ReplaceNaNsInCol(colIndex string, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceNaNsInRow(rowIndex int, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceNaNsWith(newValue any) *DataTable
- func (dt *DataTable) ReplaceNilsInCol(colIndex string, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceNilsInRow(rowIndex int, newValue any, mode ...int) *DataTable
- func (dt *DataTable) ReplaceNilsWith(newValue any) *DataTable
- func (dt *DataTable) RobustScale(cols ...string) (*DataTable, *RobustScaler, error)
- func (dt *DataTable) RollingCol(col string, opts RollingOptions) *RollingDataList
- func (dt *DataTable) RowNames() []string
- func (dt *DataTable) RowNamesToFirstCol() *DataTable
- func (dt *DataTable) Sample(n int, withReplacement bool, options ...SamplingOptions) *DataTable
- func (dt *DataTable) SampleFrac(frac float64, withReplacement bool, options ...SamplingOptions) *DataTable
- func (dt *DataTable) SetColNameByIndex(index string, name string) *DataTable
- func (dt *DataTable) SetColNameByNumber(numberIndex int, name string) *DataTable
- func (dt *DataTable) SetColNames(colNames []string) *DataTable
- func (dt *DataTable) SetColToRowNames(columnIndex string) *DataTable
- func (dt *DataTable) SetHeaders(headers []string) *DataTable
- func (dt *DataTable) SetName(name string) *DataTable
- func (dt *DataTable) SetRowNameByIndex(index int, name string) *DataTable
- func (dt *DataTable) SetRowNames(rowNames []string) *DataTable
- func (dt *DataTable) SetRowToColNames(rowIndex int) *DataTable
- func (dt *DataTable) ShiftCol(col string, periods int, fill ...any) *DataList
- func (dt *DataTable) Show()
- func (dt *DataTable) ShowRange(startEnd ...any)
- func (dt *DataTable) ShowTypes()
- func (dt *DataTable) ShowTypesRange(startEnd ...any)
- func (dt *DataTable) Shuffle(options ...SamplingOptions) *DataTable
- func (dt *DataTable) SimpleRandomSample(sampleSize int) *DataTabledeprecated
- func (dt *DataTable) Size() (numRows int, numCols int)
- func (dt *DataTable) SortBy(configs ...DataTableSortConfig) *DataTable
- func (dt *DataTable) StandardScale(cols ...string) (*DataTable, *StandardScaler, error)
- func (dt *DataTable) Summary()
- func (dt *DataTable) SwapColsByIndex(columnIndex1 string, columnIndex2 string) *DataTable
- func (dt *DataTable) SwapColsByName(columnName1 string, columnName2 string) *DataTable
- func (dt *DataTable) SwapColsByNumber(columnNumber1 int, columnNumber2 int) *DataTable
- func (dt *DataTable) SwapRowsByIndex(rowIndex1 int, rowIndex2 int) *DataTable
- func (dt *DataTable) SwapRowsByName(rowName1 string, rowName2 string) *DataTable
- func (dt *DataTable) To2DSlice() [][]any
- func (dt *DataTable) ToCSV(filePath string, setRowNamesToFirstCol bool, setColNamesToFirstRow bool, ...) error
- func (dt *DataTable) ToJSON(filePath string, useColNames bool) error
- func (dt *DataTable) ToJSON_Bytes(useColNames bool) []byte
- func (dt *DataTable) ToJSON_String(useColNames bool) string
- func (dt *DataTable) ToMap(useNamesAsKeys ...bool) map[string][]any
- func (dt *DataTable) ToSQL(db *gorm.DB, tableName string, options ...ToSQLOptions) error
- func (dt *DataTable) ToSQLContext(ctx context.Context, db *gorm.DB, tableName string, options ...ToSQLOptions) error
- func (dt *DataTable) TrainTestSplit(trainFrac float64, options ...SamplingOptions) (*DataTable, *DataTable)
- func (dt *DataTable) Transpose() *DataTable
- func (dt *DataTable) Unpivot(cfg UnpivotConfig) (*DataTable, error)
- func (dt *DataTable) UpdateCol(index string, dl *DataList) *DataTable
- func (dt *DataTable) UpdateColByNumber(index int, dl *DataList) *DataTable
- func (dt *DataTable) UpdateElement(rowIndex int, columnIndex string, value any) *DataTable
- func (dt *DataTable) UpdateRow(index int, dl *DataList) *DataTable
- type DataTableSortConfig
- type DescribeOptions
- type Encoder
- type ErrPoppingMode
- type ErrorInfo
- type ExpandingDataList
- func (e *ExpandingDataList) Max() *DataList
- func (e *ExpandingDataList) Mean() *DataList
- func (e *ExpandingDataList) Median() *DataList
- func (e *ExpandingDataList) Min() *DataList
- func (e *ExpandingDataList) Std() *DataList
- func (e *ExpandingDataList) Sum() *DataList
- func (e *ExpandingDataList) Var() *DataList
- type F64orRat
- type GroupedColumnTransform
- type GroupedDataTable
- func (g *GroupedDataTable) Aggregate(configs ...AggregateConfig) *DataTable
- func (g *GroupedDataTable) AggregateAll(op AggregateOp) *DataTable
- func (g *GroupedDataTable) Count() *DataTable
- func (g *GroupedDataTable) CumMaxCol(col string) *GroupedColumnTransform
- func (g *GroupedDataTable) CumMinCol(col string) *GroupedColumnTransform
- func (g *GroupedDataTable) CumProdCol(col string) *GroupedColumnTransform
- func (g *GroupedDataTable) CumSumCol(col string) *GroupedColumnTransform
- func (g *GroupedDataTable) Describe(options ...DescribeOptions) *DataTable
- func (g *GroupedDataTable) DiffCol(col string, periods int) *GroupedColumnTransform
- func (g *GroupedDataTable) ExpandingCol(col string, minObs int) *GroupedExpandingCol
- func (g *GroupedDataTable) PctChangeCol(col string, periods int) *GroupedColumnTransform
- func (g *GroupedDataTable) RollingCol(col string, opts RollingOptions) *GroupedRollingCol
- func (g *GroupedDataTable) ShiftCol(col string, periods int, fill ...any) *GroupedColumnTransform
- type GroupedExpandingCol
- func (ge *GroupedExpandingCol) Max() *GroupedColumnTransform
- func (ge *GroupedExpandingCol) Mean() *GroupedColumnTransform
- func (ge *GroupedExpandingCol) Median() *GroupedColumnTransform
- func (ge *GroupedExpandingCol) Min() *GroupedColumnTransform
- func (ge *GroupedExpandingCol) Std() *GroupedColumnTransform
- func (ge *GroupedExpandingCol) Sum() *GroupedColumnTransform
- func (ge *GroupedExpandingCol) Var() *GroupedColumnTransform
- type GroupedRollingCol
- func (gr *GroupedRollingCol) Apply(fn func(window []any) any) *GroupedColumnTransform
- func (gr *GroupedRollingCol) Max() *GroupedColumnTransform
- func (gr *GroupedRollingCol) Mean() *GroupedColumnTransform
- func (gr *GroupedRollingCol) Median() *GroupedColumnTransform
- func (gr *GroupedRollingCol) Min() *GroupedColumnTransform
- func (gr *GroupedRollingCol) Std() *GroupedColumnTransform
- func (gr *GroupedRollingCol) Sum() *GroupedColumnTransform
- func (gr *GroupedRollingCol) Var() *GroupedColumnTransform
- type IDataList
- type IDataTable
- type LabelEncodeOptions
- type LabelEncoder
- type LabelSort
- type LogLevel
- type MaxAbsScaler
- func (s *MaxAbsScaler) Fit(dt *DataTable, cols ...string) error
- func (s *MaxAbsScaler) FitDataList(dl *DataList) error
- func (s *MaxAbsScaler) FitTransform(dt *DataTable, cols ...string) (*DataTable, error)
- func (s *MaxAbsScaler) FitTransformDataList(dl *DataList) (*DataList, error)
- func (s *MaxAbsScaler) InverseTransform(dt *DataTable) (*DataTable, error)
- func (s *MaxAbsScaler) InverseTransformDataList(dl *DataList) (*DataList, error)
- func (s *MaxAbsScaler) Kind() string
- func (s *MaxAbsScaler) Params() map[string]ScalerParams
- func (s *MaxAbsScaler) Transform(dt *DataTable) (*DataTable, error)
- func (s *MaxAbsScaler) TransformDataList(dl *DataList) (*DataList, error)
- type MergeDirection
- type MergeMode
- type MinMaxScaler
- func (s *MinMaxScaler) Fit(dt *DataTable, cols ...string) error
- func (s *MinMaxScaler) FitDataList(dl *DataList) error
- func (s *MinMaxScaler) FitTransform(dt *DataTable, cols ...string) (*DataTable, error)
- func (s *MinMaxScaler) FitTransformDataList(dl *DataList) (*DataList, error)
- func (s *MinMaxScaler) InverseTransform(dt *DataTable) (*DataTable, error)
- func (s *MinMaxScaler) InverseTransformDataList(dl *DataList) (*DataList, error)
- func (s *MinMaxScaler) Kind() string
- func (s *MinMaxScaler) Params() map[string]ScalerParams
- func (s *MinMaxScaler) Transform(dt *DataTable) (*DataTable, error)
- func (s *MinMaxScaler) TransformDataList(dl *DataList) (*DataList, error)
- type NaNPolicy
- type OneHotEncoder
- type OneHotOptions
- type OrdinalEncodeOptions
- type OrdinalEncoder
- type PivotConfig
- type ReadSQLChunk
- type ReadSQLOptions
- type RobustScaler
- func (s *RobustScaler) Fit(dt *DataTable, cols ...string) error
- func (s *RobustScaler) FitDataList(dl *DataList) error
- func (s *RobustScaler) FitTransform(dt *DataTable, cols ...string) (*DataTable, error)
- func (s *RobustScaler) FitTransformDataList(dl *DataList) (*DataList, error)
- func (s *RobustScaler) InverseTransform(dt *DataTable) (*DataTable, error)
- func (s *RobustScaler) InverseTransformDataList(dl *DataList) (*DataList, error)
- func (s *RobustScaler) Kind() string
- func (s *RobustScaler) Params() map[string]ScalerParams
- func (s *RobustScaler) Transform(dt *DataTable) (*DataTable, error)
- func (s *RobustScaler) TransformDataList(dl *DataList) (*DataList, error)
- type RollingDataList
- func (r *RollingDataList) Apply(fn func(window []any) any) *DataList
- func (r *RollingDataList) Corr(other *DataList) *DataList
- func (r *RollingDataList) Max() *DataList
- func (r *RollingDataList) Mean() *DataList
- func (r *RollingDataList) Median() *DataList
- func (r *RollingDataList) Min() *DataList
- func (r *RollingDataList) Std() *DataList
- func (r *RollingDataList) Sum() *DataList
- func (r *RollingDataList) Var() *DataList
- type RollingOptions
- type SQLActionIfTableExists
- type SamplingOptions
- type Scaler
- type ScalerParams
- type StandardScaler
- func (s *StandardScaler) Fit(dt *DataTable, cols ...string) error
- func (s *StandardScaler) FitDataList(dl *DataList) error
- func (s *StandardScaler) FitTransform(dt *DataTable, cols ...string) (*DataTable, error)
- func (s *StandardScaler) FitTransformDataList(dl *DataList) (*DataList, error)
- func (s *StandardScaler) InverseTransform(dt *DataTable) (*DataTable, error)
- func (s *StandardScaler) InverseTransformDataList(dl *DataList) (*DataList, error)
- func (s *StandardScaler) Kind() string
- func (s *StandardScaler) Params() map[string]ScalerParams
- func (s *StandardScaler) Transform(dt *DataTable) (*DataTable, error)
- func (s *StandardScaler) TransformDataList(dl *DataList) (*DataList, error)
- type ToSQLOptions
- type UnknownPolicy
- type UnpivotConfig
Constants ¶
const Version = "0.2.19"
const VersionName = "Pier-2"
Variables ¶
var Config *configStruct = &configStruct{}
var ReadSlice2D = Slice2DToDataTable
Alias for Slice2DToDataTable Converts a 2D slice into a DataTable. Supports various types like [][]any, [][]int, [][]float64, [][]string, etc.
var ToFloat64 = utils.ToFloat64
var ToFloat64Safe = utils.ToFloat64Safe
Functions ¶
func CalcColIndex ¶ added in v0.2.13
CalcColIndex converts a 0-based column number to its Excel-style index (A, B, C, ..., Z, AA, AB, ...).
func ClearErrors ¶ added in v0.2.2
func ClearErrors()
func DetectEncoding ¶ added in v0.2.10
DetectEncoding detects the character encoding of a text file. It reads a sample of the file and returns the detected charset in lower case (e.g. "utf-8", "big5", "gb-18030", "utf-16le"). For empty files it returns an error.
func GetErrorCount ¶ added in v0.2.2
func GetErrorCount() int
func HasError ¶ added in v0.2.13
func HasError() bool
HasError returns true if there are any errors in the buffer. This is a non-destructive check that doesn't modify the error buffer.
func HasErrorAboveLevel ¶ added in v0.2.13
HasErrorAboveLevel returns true if there are any errors at or above the specified level. This is a non-destructive check that doesn't modify the error buffer.
func LogWarning ¶
func ParseColIndex ¶ added in v0.2.2
ParseColIndex converts an Excel-like column name (e.g., "A", "Z", "AA") to its 0-based integer index.
func PopErrorAndCallback ¶ added in v0.2.2
func PopErrorAndCallback(mode ErrPoppingMode, callback func(errType LogLevel, packageName string, funcName string, errMsg string))
func ProcessData ¶
ProcessData processes the input data and returns the data and the length of the data. Returns nil and 0 if the data type is unsupported. Supported data types are slices, IDataList, and pointers to these types.
func ReadSQLStream ¶ added in v0.2.18
func ReadSQLStream(ctx context.Context, db *gorm.DB, tableName string, options ...ReadSQLOptions) (<-chan ReadSQLChunk, error)
ReadSQLStream reads a (potentially huge) query result in chunks, emitting each chunk as a DataTable on the returned channel. The channel is closed when the stream completes, when ctx is cancelled, or after a fatal error.
Chunk size is controlled by options[0].ChunkSize; zero means use the package default (1000 rows). The reader goroutine respects ctx cancellation between rows.
func Show ¶ added in v0.2.6
Show displays the content of any showable object with a label. Automatically deals with nil objects.
func SliceToF64 ¶
SliceToF64 converts a []any to a []float64.
Types ¶
type AggregateConfig ¶ added in v0.2.18
type AggregateConfig struct {
// SourceCol is the source column to aggregate. It is matched first by name,
// then by Excel-style index ("A"/"B"/...). Required for every Op except
// OpCountAll, where an empty SourceCol is allowed.
SourceCol string
// As is the output column name. If empty, the column is auto-named as
// "<source>_<op>" (e.g., "revenue_sum"). For OpCountAll without a source
// column, the default is "count_all".
As string
// Op selects the aggregation. See AggregateOp constants.
Op AggregateOp
// Custom is the user-provided function for OpCustom. It receives a
// DataList containing the values in the source column for this group
// (in original row order, including nil entries) and returns any value.
// Ignored when Op != OpCustom.
Custom func(group *DataList) any
}
AggregateConfig describes a single aggregation operation produced by Aggregate.
type AggregateOp ¶ added in v0.2.18
type AggregateOp int
AggregateOp identifies an aggregation operation applied per group by Aggregate.
const ( // OpSum sums numeric values in the group; non-numeric and nil values are skipped. OpSum AggregateOp = iota // OpMean computes the arithmetic mean of numeric values in the group. OpMean // OpMedian computes the median of numeric values in the group. OpMedian // OpMin returns the minimum numeric value in the group. OpMin // OpMax returns the maximum numeric value in the group. OpMax // OpCount counts non-nil values in the group. OpCount // OpCountAll counts all rows in the group, including those with nil values. OpCountAll // OpStdev computes the sample standard deviation of numeric values in the group. OpStdev // OpStdevP computes the population standard deviation of numeric values in the group. OpStdevP // OpVar computes the sample variance of numeric values in the group. OpVar // OpVarP computes the population variance of numeric values in the group. OpVarP // OpFirst returns the first non-nil value in the group (in original row order). OpFirst // OpLast returns the last non-nil value in the group (in original row order). OpLast // OpNUnique counts the distinct non-nil values in the group. OpNUnique // OpCustom invokes Custom func on the group's sub-DataList for the source column. OpCustom )
func (AggregateOp) String ¶ added in v0.2.18
func (o AggregateOp) String() string
String returns the canonical short name of an AggregateOp (e.g. "sum", "mean").
type DataList ¶
type DataList struct {
// contains filtered or unexported fields
}
DataList is a generic dynamic data list.
func NewDataList ¶
NewDataList creates a new DataList, supporting both slice and variadic inputs, and flattens the input before storing it.
func (*DataList) Append ¶
Append adds a new values to the DataList. The value can be of any type. The value is appended to the end of the DataList.
func (*DataList) AppendDataList ¶ added in v0.2.11
AppendDataList appends another DataList to the current DataList.
func (*DataList) Capitalize ¶
Capitalize capitalizes the first letter of each string element in the DataList.
func (*DataList) ClearErr ¶ added in v0.2.13
ClearErr clears the last error stored in the DataList. Returns the DataList to support chaining.
func (*DataList) ClearNaNs ¶ added in v0.0.5
ClearNaNs removes all NaN values from the DataList and updates the timestamp.
func (*DataList) ClearNils ¶ added in v0.2.10
ClearNils removes all nil values from the DataList and updates the timestamp.
func (*DataList) ClearNilsAndNaNs ¶ added in v0.2.10
ClearNilsAndNaNs removes all nil and NaN values from the DataList and updates the timestamp.
func (*DataList) ClearNumbers ¶
ClearNumbers removes all numeric elements (int, float, etc.) from the DataList and updates the timestamp.
func (*DataList) ClearOutliers ¶ added in v0.0.5
ClearOutliers removes values from the DataList that are outside the specified number of standard deviations. This method modifies the original DataList and returns it.
func (*DataList) ClearStrings ¶
ClearStrings removes all string elements from the DataList and updates the timestamp.
func (*DataList) Concat ¶ added in v0.2.11
Concat creates a new DataList by concatenating another DataList to the current DataList.
func (*DataList) Count ¶ added in v0.0.1
Count returns the number of occurrences of the specified value in the DataList.
func (*DataList) Counter ¶ added in v0.0.12
Counter returns a map of the number of occurrences of each value in the DataList.
func (*DataList) CumMax ¶ added in v0.2.18
CumMax returns the cumulative maximum. The accumulator is seeded by the first numeric value. See CumSum for nil semantics.
func (*DataList) CumMin ¶ added in v0.2.18
CumMin returns the cumulative minimum. The accumulator is seeded by the first numeric value. See CumSum for nil semantics.
func (*DataList) CumProd ¶ added in v0.2.18
CumProd returns the cumulative product. See CumSum for nil semantics.
func (*DataList) CumSum ¶ added in v0.2.18
CumSum returns a same-length DataList where out[i] is the cumulative sum of the numeric values in in[0..=i]. Non-numeric or nil cells produce nil at that output position but do not break the running accumulator, matching pandas .cumsum(skipna=True).
func (*DataList) Data ¶
Data returns a copy of the data stored in the DataList. This prevents external modification of the internal data (Copy-on-Read).
func (*DataList) Describe ¶ added in v0.2.19
func (dl *DataList) Describe(options ...DescribeOptions) *DataTable
Describe returns a programmatic statistical summary of the DataList.
func (*DataList) Diff ¶ added in v0.2.18
Diff returns the periods-step backward difference of the DataList: out[i] = in[i] - in[i-periods] (numeric subtraction). The first periods positions are nil. Cells where either operand is non-numeric or nil are emitted as nil. periods must be > 0; non-positive periods produce a warning and return nil.
Unlike the legacy Difference (which returns length n-1), Diff preserves the input length so the result lines up with neighbouring columns.
func (*DataList) Difference ¶ added in v0.0.5
Difference calculates the differences between adjacent elements in the DataList. The output is one element shorter than the input (out[i] = in[i+1] - in[i]).
For column-aligned use (same length as the input, leading nils) prefer Diff(1) — it preserves length so the result can sit alongside other columns in a DataTable. Difference is retained for backwards compatibility.
func (*DataList) DoubleExponentialSmoothing ¶ added in v0.0.5
DoubleExponentialSmoothing applies double exponential smoothing to the DataList. The alpha parameter controls the level smoothing, and the beta parameter controls the trend smoothing. Returns a new DataList containing the smoothed values.
func (*DataList) Drop ¶
Drop removes the element at the specified index from the DataList and updates the timestamp. Returns an error if the index is out of bounds.
func (*DataList) DropAll ¶
DropAll removes all occurrences of the specified values from the DataList. Supports multiple values to drop.
func (*DataList) DropIfContains ¶ added in v0.0.2
DropIfContains removes all elements from the DataList that contain the specified substring. This method only affects string elements. Non-string elements are kept.
func (*DataList) Err ¶ added in v0.2.13
Err returns the last error that occurred during a chained operation. Returns nil if no error occurred. This method allows for error checking after chained calls without breaking the chain.
Example usage:
dl.Append(1, 2, 3).Sort().Reverse()
if err := dl.Err(); err != nil {
// handle error
}
func (*DataList) Expanding ¶ added in v0.2.18
func (dl *DataList) Expanding(minObs int) *ExpandingDataList
Expanding builds an expanding-window view over dl. MinObs <= 0 defaults to 1.
func (*DataList) ExponentialSmoothing ¶ added in v0.0.5
ExponentialSmoothing applies exponential smoothing to the DataList. The alpha parameter controls the smoothing factor. Returns a new DataList containing the smoothed values.
func (*DataList) FillBackward ¶ added in v0.2.19
FillBackward replaces missing values with the next non-missing value.
func (*DataList) FillByInterpolation ¶ added in v0.2.19
FillByInterpolation fills missing sequence values by index, unlike LinearInterpolation which evaluates y at x.
func (*DataList) FillForward ¶ added in v0.2.19
FillForward replaces missing values with the most recent non-missing value.
func (*DataList) FillNaNWithMean
deprecated
added in
v0.0.5
FillNaNWithMean replaces all NaN values in the DataList with the mean value. Directly modifies the DataList.
Deprecated: use FillWithMean instead, which also fills nil (not just NaN), leaves non-numeric values untouched, and matches the other Fill* imputation methods.
func (*DataList) FillWithMean ¶ added in v0.2.19
FillWithMean replaces missing values (NaN and nil) with the mean of observed numeric values.
func (*DataList) FillWithMedian ¶ added in v0.2.19
FillWithMedian replaces missing values with the median of observed numeric values.
func (*DataList) FillWithMode ¶ added in v0.2.19
FillWithMode replaces missing values with the first-occurring mode of observed values.
func (*DataList) Filter ¶
Filter filters the DataList based on a custom filter function provided by the user. The filter function should return true for elements that should be included in the result.
func (*DataList) FindAll ¶
FindAll returns a slice of all the indices where the specified value is found in the DataList. If the value is not found, it returns an empty slice.
func (*DataList) FindFirst ¶
FindFirst returns the index of the first occurrence of the specified value in the DataList. If the value is not found, it returns nil.
func (*DataList) FindLast ¶
FindLast returns the index of the last occurrence of the specified value in the DataList. If the value is not found, it returns nil.
func (*DataList) GMean ¶
GMean calculates the geometric mean of the DataList. Returns the geometric mean. Returns math.NaN() if the DataList is empty or if no elements can be converted to float64.
func (*DataList) Get ¶
Get retrieves the value at the specified index in the DataList. Supports negative indexing. Returns nil if the index is out of bounds. Returns the value at the specified index.
func (*DataList) GetCreationTimestamp ¶
GetCreationTimestamp returns the creation time of the DataList in Unix timestamp.
func (*DataList) GetLastModifiedTimestamp ¶
GetLastModifiedTimestamp returns the last updated time of the DataList in Unix timestamp.
func (*DataList) HermiteInterpolation ¶ added in v0.0.5
HermiteInterpolation performs Hermite interpolation for the given x value using the DataList.
func (*DataList) IQR ¶
IQR calculates the interquartile range of the DataList. Returns math.NaN() if the DataList is empty or if Q1 or Q3 cannot be calculated. Returns the interquartile range (Q3 - Q1) as a float64.
func (*DataList) InsertAt ¶
InsertAt inserts a value at the specified index in the DataList. If the index is out of bounds, the value is appended to the end of the list. Returns the DataList to support chaining calls.
func (*DataList) IsEqualTo ¶ added in v0.0.12
IsEqualTo checks if the data of the DataList is equal to another DataList.
func (*DataList) IsTheSameAs ¶ added in v0.0.12
IsTheSameAs checks if the DataList is fully the same as another DataList. It checks for equality in name, data, creation timestamp, and last modified timestamp.
func (*DataList) LagrangeInterpolation ¶ added in v0.0.5
LagrangeInterpolation performs Lagrange interpolation for the given x value using the DataList.
func (*DataList) LinearInterpolation ¶ added in v0.0.5
LinearInterpolation performs linear interpolation for the given x value using the DataList.
func (*DataList) MAD ¶ added in v0.0.5
MAD calculates the mean absolute deviation of the DataList. Returns math.NaN() if the DataList is empty or if no valid elements can be used.
func (*DataList) Map ¶ added in v0.2.0
Map applies a function to all elements in the DataList and returns a new DataList with the results. The mapFunc should take an element of any type and its index, then return a transformed value of any type.
func (*DataList) Max ¶
Max returns the maximum value in the DataList. Returns math.NaN() if the DataList is empty or if no elements can be converted to float64.
func (*DataList) Mean ¶
Mean calculates the arithmetic mean of the DataList. Returns math.NaN() if the DataList is empty or if no elements can be converted to float64.
func (*DataList) Median ¶
Median calculates the median of the DataList. Returns math.NaN() if the DataList is empty or if no valid elements can be used.
func (*DataList) Min ¶
Min returns the minimum value in the DataList. Returns math.NaN() if the DataList is empty or if no elements can be converted to float64.
func (*DataList) Mode ¶
Mode calculates the mode of the DataList. Only works with numeric data types. Mode could be a single value or multiple values. Returns nil if the DataList is empty or if no valid elements can be used.
func (*DataList) MovingAverage ¶ added in v0.0.5
MovingAverage calculates the moving average of the DataList using a specified window size. Returns a new DataList containing the moving average values.
func (*DataList) MovingStdev ¶ added in v0.0.5
MovingStdDev calculates the moving standard deviation for the DataList using a specified window size.
func (*DataList) NearestNeighborInterpolation ¶ added in v0.0.5
NearestNeighborInterpolation performs nearest-neighbor interpolation for the given x value using the DataList.
func (*DataList) NewtonInterpolation ¶ added in v0.0.5
NewtonInterpolation performs Newton's interpolation for the given x value using the DataList.
func (*DataList) Normalize ¶ added in v0.0.5
Normalize normalizes the data in the DataList, skipping NaN values. Directly modifies the DataList.
func (*DataList) ParseNumbers ¶
ParseNumbers attempts to parse all string elements in the DataList to numeric types. If parsing fails, the element is left unchanged.
func (*DataList) ParseStrings ¶
ParseStrings converts all elements in the DataList to strings.
func (*DataList) PctChange ¶ added in v0.2.18
PctChange returns the periods-step percentage change of the DataList: out[i] = (in[i] - in[i-periods]) / in[i-periods]. The first periods positions are nil. Cells where either operand is non-numeric, or where the denominator is zero, are emitted as nil. periods must be > 0.
func (*DataList) Percentile ¶
Percentile calculates the percentile based on the input value (0 to 100).
func (*DataList) Pop ¶
Pop removes and returns the last element from the DataList. Returns the last element. Returns nil if the DataList is empty.
func (*DataList) QuadraticInterpolation ¶ added in v0.0.5
QuadraticInterpolation performs quadratic interpolation for the given x value using the DataList.
func (*DataList) Quartile ¶
Quartile calculates the quartile based on the input value (1 to 3). 1 corresponds to the first quartile (Q1), 2 to the median (Q2), and 3 to the third quartile (Q3). This implementation uses percentiles to compute quartiles.
func (*DataList) Range ¶
Range calculates the range of the DataList. Returns math.NaN() if the DataList is empty or if Max or Min cannot be calculated.
func (*DataList) Rank ¶ added in v0.0.4
Rank assigns ranks to the elements in the DataList. By default, it ranks in ascending order (smaller value gets smaller rank). Pass false to rank in descending order.
func (*DataList) ReplaceAll ¶
ReplaceAll replaces all occurrences of oldValue with newValue in the DataList. If oldValue is not found, no changes are made.
func (*DataList) ReplaceFirst ¶
ReplaceFirst replaces the first occurrence of oldValue with newValue.
func (*DataList) ReplaceLast ¶
ReplaceLast replaces the last occurrence of oldValue with newValue.
func (*DataList) ReplaceNaNsAndNilsWith ¶ added in v0.2.10
ReplaceNaNsAndNilsWith replaces all NaN and nil values in the DataList with the specified value.
func (*DataList) ReplaceNaNsWith ¶ added in v0.2.10
ReplaceNaNsWith replaces all NaN values in the DataList with the specified value.
func (*DataList) ReplaceNilsWith ¶ added in v0.2.10
ReplaceNilsWith replaces all nil values in the DataList with the specified value.
func (*DataList) ReplaceOutliers ¶ added in v0.0.5
ReplaceOutliers replaces outliers in the DataList with the specified replacement value (e.g., mean, median).
Parameters: - stdDevs: Number of standard deviations to use as threshold (e.g., 2.0 means values beyond ±2σ from the mean will be replaced) - replacement: Value to replace outliers with
func (*DataList) Rolling ¶ added in v0.2.18
func (dl *DataList) Rolling(opts RollingOptions) *RollingDataList
Rolling builds a rolling-window view over dl. The returned RollingDataList captures dl's current contents; subsequent mutations to dl do not affect the rolling computation.
func (*DataList) Sample ¶ added in v0.2.19
func (dl *DataList) Sample(n int, withReplacement bool, options ...SamplingOptions) *DataList
Sample returns a new DataList containing n randomly selected elements.
func (*DataList) SampleFrac ¶ added in v0.2.19
func (dl *DataList) SampleFrac(frac float64, withReplacement bool, options ...SamplingOptions) *DataList
SampleFrac returns a new DataList containing frac of the elements.
func (*DataList) Shift ¶ added in v0.2.18
Shift returns a new DataList shifted by periods positions, keeping the original length. Positive periods shift right (lag — value at output index i is the input value at i-periods). Negative periods shift left (lead). Empty positions are filled with nil by default; pass a single fill value to override. Non-numeric values pass through unchanged, so Shift works on any column type. When |periods| >= len(dl) the output is all-fill of the same length.
func (*DataList) Show ¶ added in v0.2.0
func (dl *DataList) Show()
Show displays the content of DataList in a clean linear format. It adapts to terminal width and always displays in a linear format, not as a table. For more control over which items to display, use ShowRange.
func (*DataList) ShowRange ¶ added in v0.2.0
ShowRange displays the content of DataList within a specified range in a clean linear format. It adapts to terminal width and always displays in a linear format, not as a table. startEnd is an optional parameter that can be [start, end] to specify the range of items to display. if startEnd is not provided, all items will be displayed. if only one value is provided, there are two behaviors: - if positive, it shows the first N items (e.g., ShowRange(5) shows first 5 items) - if negative, it shows the last N items (e.g., ShowRange(-5) shows last 5 items) For two parameters [start, end], it shows items from index start (inclusive) to index end (exclusive). If end is nil, it shows items from index start to the end of the list. Example: dl.ShowRange() - shows all items Example: dl.ShowRange(5) - shows the first 5 items Example: dl.ShowRange(-5) - shows the last 5 items Example: dl.ShowRange(2, 10) - shows items with indices 2 to 9 (not including 10) Example: dl.ShowRange(2, -1) - shows items from index 2 to the end of the list Example: dl.ShowRange(2, nil) - shows items from index 2 to the end of the list
func (*DataList) ShowTypes ¶ added in v0.2.0
func (dl *DataList) ShowTypes()
ShowTypes displays the data types of each element in the DataList. It adapts to terminal width and always displays in a linear format.
func (*DataList) ShowTypesRange ¶ added in v0.2.0
ShowTypesRange displays the data types of DataList within a specified range. startEnd 参数同 ShowRange。
func (*DataList) Shuffle ¶ added in v0.2.19
func (dl *DataList) Shuffle(options ...SamplingOptions) *DataList
Shuffle returns a randomly reordered copy of the DataList.
func (*DataList) Sort ¶
Sort sorts the DataList using a mixed sorting logic. It handles string, numeric (including all integer and float types), and time data types. If sorting fails, it restores the original order.
func (*DataList) Standardize ¶ added in v0.0.5
Standardize standardizes the data in the DataList. Directly modifies the DataList.
func (*DataList) Stdev ¶
Stdev calculates the standard deviation (sample) of the DataList. Returns math.NaN() if the DataList is empty or if no valid elements can be used.
func (*DataList) StdevP ¶
StdevP calculates the standard deviation (population) of the DataList. Returns math.NaN() if the DataList is empty or if no valid elements can be used.
func (*DataList) Sum ¶ added in v0.0.1
Sum calculates the sum of all elements in the DataList. Returns math.NaN() if the DataList is empty or if no elements can be converted to float64.
func (*DataList) Summary ¶ added in v0.2.0
func (dl *DataList) Summary()
Summary displays a comprehensive statistical summary of the DataList directly to the console.
func (*DataList) ToF64Slice ¶
ToF64Slice converts the DataList to a float64 slice. Returns the float64 slice. Returns nil if the DataList is empty. ToF64Slice converts the DataList to a float64 slice.
func (*DataList) ToStringSlice ¶
ToStringSlice converts the DataList to a string slice. Returns the string slice. Returns nil if the DataList is empty.
func (*DataList) Update ¶
Update replaces the value at the specified index with the new value. Returns the DataList to support chaining calls.
func (*DataList) Var ¶
Var calculates the variance (sample variance) of the DataList. Returns math.NaN() if the DataList is empty or if not enough valid elements are available.
func (*DataList) VarP ¶
VarP calculates the variance (population variance) of the DataList. Returns math.NaN() if the DataList is empty or if no valid elements can be used.
func (*DataList) WeightedMean ¶ added in v0.0.5
WeightedMean calculates the weighted mean of the DataList using the provided weights. The weights parameter should be a slice or a DataList of the same length as the DataList. Returns math.NaN() if the DataList is empty, weights are invalid, or if no valid elements can be used.
func (*DataList) WeightedMovingAverage ¶ added in v0.0.5
WeightedMovingAverage applies a weighted moving average to the DataList with a given window size. The weights parameter should be a slice or a DataList of the same length as the window size. Returns a new DataList containing the weighted moving average values.
type DataListScaler ¶ added in v0.2.19
type DataListScaler interface {
FitDataList(dl *DataList) error
TransformDataList(dl *DataList) (*DataList, error)
FitTransformDataList(dl *DataList) (*DataList, error)
InverseTransformDataList(dl *DataList) (*DataList, error)
}
DataListScaler is the DataList-oriented counterpart of Scaler.
type DataTable ¶ added in v0.0.1
type DataTable struct {
// contains filtered or unexported fields
}
DataTable is the core data structure of Insyra for handling structured data. It provides rich data manipulation functionality including reading, writing, filtering, statistical analysis, and transformation operations.
DataTable uses a columnar storage format where each column is represented by a DataList. It supports both alphabetical column indexing (A, B, C...) and named columns, as well as row naming capabilities.
Key features: - Thread-safe operations via AtomicDo - Flexible data types using any - Column and row indexing/naming - Comprehensive data manipulation methods - CSV/JSON/SQL import/export capabilities
func NewDataTable ¶ added in v0.0.1
func ReadCSV_File ¶ added in v0.2.9
func ReadCSV_File(filePath string, setFirstColToRowNames bool, setFirstRowToColNames bool, encoding ...string) (*DataTable, error)
ReadCSV_File loads a CSV file into a DataTable, with options to set the first column as row names and the first row as column names.
func ReadCSV_String ¶ added in v0.2.4
func ReadExcelSheet ¶ added in v0.2.10
func ReadExcelSheet(filePath string, sheetName string, setFirstColToRowNames bool, setFirstRowToColNames bool) (*DataTable, error)
ReadExcelSheet reads a specific sheet from an Excel file and loads it into a DataTable.
func ReadJSON_File ¶ added in v0.2.9
ReadJSON_File reads a JSON file and loads the data into a DataTable and returns it.
func ReadSQL ¶ added in v0.2.0
ReadSQL reads table data from the database and converts it into a DataTable.
Equivalent to ReadSQLContext(context.Background(), db, tableName, options...).
func ReadSQLContext ¶ added in v0.2.18
func ReadSQLContext(ctx context.Context, db *gorm.DB, tableName string, options ...ReadSQLOptions) (*DataTable, error)
ReadSQLContext is the context-aware variant of ReadSQL. The query and row scanning run under ctx, so callers can cancel long-running reads.
func Slice2DToDataTable ¶ added in v0.2.7
Slice2DToDataTable converts a 2D slice of any type into a DataTable. It supports various 2D array types such as [][]any, [][]int, [][]float64, [][]string, etc.
func (*DataTable) AddColUsingCCL ¶ added in v0.2.2
func (*DataTable) AppendCols ¶ added in v0.0.14
AppendCols appends columns to the DataTable, with each column represented by a DataList. If the columns are shorter than the existing columns, nil values will be appended to match the length. If the columns are longer than the existing columns, the existing columns will be extended with nil values.
func (*DataTable) AppendRowsByColIndex ¶ added in v0.0.14
AppendRowsByIndex appends rows to the DataTable, with each row represented by a map of column index and value. If the rows are shorter than the existing columns, nil values will be appended to match the length. If the rows are longer than the existing columns, the existing columns will be extended with nil values.
func (*DataTable) AppendRowsByColName ¶ added in v0.0.14
AppendRowsByName appends rows to the DataTable, with each row represented by a map of column name and value. If the rows are shorter than the existing columns, nil values will be appended to match the length. If the rows are longer than the existing columns, the existing columns will be extended with nil values.
func (*DataTable) AppendRowsFromDataList ¶ added in v0.0.1
AppendRowsFromDataList appends rows to the DataTable, with each row represented by a DataList. If the rows are shorter than the existing columns, nil values will be appended to match the length. If the rows are longer than the existing columns, the existing columns will be extended with nil values.
func (*DataTable) ChangeColName ¶ added in v0.2.2
func (*DataTable) ChangeRowName ¶ added in v0.2.2
ChangeRowName changes the name of a row from oldName to newName. Parameters:
- oldName: The current name of the row.
- newName: The new name to set for the row.
Returns:
- *DataTable: The DataTable instance for chaining.
func (*DataTable) ClearErr ¶ added in v0.2.13
ClearErr clears the last error stored in the DataTable. Returns the DataTable to support chaining.
func (*DataTable) Clone ¶ added in v0.2.5
Clone creates a deep copy of the DataTable. It copies all columns, column indices, row names, and metadata, ensuring that modifications to the original DataTable do not affect the clone. The cloned DataTable has a new creation timestamp and is fully independent.
func (*DataTable) ColNamesToFirstRow ¶ added in v0.2.4
func (*DataTable) Count ¶ added in v0.0.4
Count returns the number of occurrences of the given value in the DataTable.
func (*DataTable) Counter ¶ added in v0.0.14
Counter returns the number of occurrences of the given value in the DataTable. Return a map[any]int
func (*DataTable) CumProdCol ¶ added in v0.2.18
CumProdCol returns the cumulative product of dt[col].
func (*DataTable) Describe ¶ added in v0.2.19
func (dt *DataTable) Describe(options ...DescribeOptions) *DataTable
Describe returns a programmatic per-column statistical summary of the DataTable. By default it includes only numeric columns; IncludeAll also includes non-numeric and mixed columns.
func (*DataTable) DiffCol ¶ added in v0.2.18
DiffCol returns a new column equal to dt[col].Diff(periods).
func (*DataTable) DropColNames ¶ added in v0.2.4
func (*DataTable) DropColsByIndex ¶ added in v0.0.14
DropColsByIndex drops columns by their index names.
func (*DataTable) DropColsByName ¶ added in v0.0.14
DropColsByName drops columns by their names.
func (*DataTable) DropColsByNumber ¶ added in v0.0.14
DropColsByNumber drops columns by their number.
func (*DataTable) DropColsContain ¶ added in v0.2.6
DropColsContain drops columns that contain the specified value.
func (*DataTable) DropColsContainExcelNA ¶ added in v0.2.6
DropColsContainExcelNA drops columns that contain Excel NA values ("#N/A").
func (*DataTable) DropColsContainNaN ¶ added in v0.2.10
DropColsContainNaN drops columns that contain NaN (Not a Number) elements.
func (*DataTable) DropColsContainNil ¶ added in v0.0.14
DropColsContainNil drops columns that contain nil elements.
func (*DataTable) DropColsContainNumber ¶ added in v0.2.10
DropColsContainNumber drops columns that contain number elements.
func (*DataTable) DropColsContainString ¶ added in v0.2.10
DropColsContainString drops columns that contain string elements.
func (*DataTable) DropRowNames ¶ added in v0.2.4
DropRowNames removes all row names from the DataTable. Returns:
- *DataTable: The DataTable instance for chaining.
func (*DataTable) DropRowsByIndex ¶ added in v0.0.1
DropRowsByIndex drops rows by their indices.
func (*DataTable) DropRowsByName ¶ added in v0.0.1
DropRowsByName drops rows by their names.
func (*DataTable) DropRowsContain ¶ added in v0.2.6
DropRowsContain drops rows that contain the specified value.
func (*DataTable) DropRowsContainExcelNA ¶ added in v0.2.6
DropRowsContainExcelNA drops rows that contain Excel NA values ("#N/A").
func (*DataTable) DropRowsContainNaN ¶ added in v0.2.10
DropRowsContainNaN drops rows that contain NaN (Not a Number) elements.
func (*DataTable) DropRowsContainNil ¶ added in v0.0.2
DropRowsContainNil drops rows that contain nil elements.
func (*DataTable) DropRowsContainNumber ¶ added in v0.2.10
DropRowsContainNumber drops rows that contain number elements.
func (*DataTable) DropRowsContainString ¶ added in v0.2.10
DropRowsContainString drops rows that contain string elements.
func (*DataTable) EditColByIndexUsingCCL ¶ added in v0.2.9
EditColByIndexUsingCCL modifies an existing column at the specified index using a CCL expression. The column index uses Excel-style letters (A, B, C, ..., AA, AB, etc.) where A = first column. Returns the modified DataTable.
func (*DataTable) EditColByNameUsingCCL ¶ added in v0.2.9
EditColByNameUsingCCL modifies an existing column with the specified name using a CCL expression. Returns the modified DataTable. If the column name is not found, a warning is logged.
func (*DataTable) Err ¶ added in v0.2.13
Err returns the last error that occurred during a chained operation. Returns nil if no error occurred. This method allows for error checking after chained calls without breaking the chain.
Example usage:
dt.Replace(old, new).ReplaceNaNsWith(0).SortBy(config)
if err := dt.Err(); err != nil {
// handle error
}
func (*DataTable) ExecuteCCL ¶ added in v0.2.9
ExecuteCCL executes multi-line CCL statements on the DataTable. It supports assignment syntax (e.g., A=B+C) and NEW('colName', expr) for creating new columns. Multiple statements can be separated by ; or newline. Assignment operations modify existing columns; if the target column doesn't exist, an error is returned. Returns the modified DataTable.
func (*DataTable) ExpandingCol ¶ added in v0.2.18
func (dt *DataTable) ExpandingCol(col string, minObs int) *ExpandingDataList
ExpandingCol returns an ExpandingDataList view of dt[col].
func (*DataTable) FillBackward ¶ added in v0.2.19
FillBackward fills missing values in selected columns using next observed values. limit caps how many consecutive missing cells each gap fills; limit <= 0 means unlimited.
func (*DataTable) FillByInterpolation ¶ added in v0.2.19
FillByInterpolation fills missing values in numeric columns by linear interpolation.
func (*DataTable) FillForward ¶ added in v0.2.19
FillForward fills missing values in selected columns using previous observed values. limit caps how many consecutive missing cells each gap fills; limit <= 0 means unlimited.
func (*DataTable) FillWithMean ¶ added in v0.2.19
FillWithMean fills missing values in numeric columns using the mean.
func (*DataTable) FillWithMedian ¶ added in v0.2.19
FillWithMedian fills missing values in numeric columns using the median.
func (*DataTable) FillWithMode ¶ added in v0.2.19
FillWithMode fills missing values in selected columns using the mode.
func (*DataTable) Filter ¶ added in v0.0.2
func (dt *DataTable) Filter(filterFunc func(rowIndex int, columnIndex string, value any) bool) *DataTable
Filter applies a custom filter function to the DataTable and returns the filtered DataTable.
func (*DataTable) FilterByCustomElement ¶ added in v0.0.2
FilterByCustomElement filters the table based on a custom function applied to each element.
func (*DataTable) FilterCols ¶ added in v0.2.9
func (dt *DataTable) FilterCols(filterFunc func(rowIndex int, rowName string, x any) bool) *DataTable
FilterCols applies a custom filter function to each cell in every column and returns a new DataTable that only contains columns where the filter function returns true for at least one cell in that column.
The filter function receives: - rowIndex: index of the row - rowName: name of the row (empty if none) - x: the cell value
func (*DataTable) FilterColsByColIndexEqualTo ¶ added in v0.2.9
FilterColsByColIndexEqualTo filters to only keep the column with the specified index.
func (*DataTable) FilterColsByColIndexGreaterThan ¶ added in v0.2.9
FilterColsByColIndexGreaterThan filters columns with index greater than the specified column.
func (*DataTable) FilterColsByColIndexGreaterThanOrEqualTo ¶ added in v0.2.9
FilterColsByColIndexGreaterThanOrEqualTo filters columns with index greater than or equal to the specified column.
func (*DataTable) FilterColsByColIndexLessThan ¶ added in v0.2.9
FilterColsByColIndexLessThan filters columns with index less than the specified column.
func (*DataTable) FilterColsByColIndexLessThanOrEqualTo ¶ added in v0.2.9
FilterColsByColIndexLessThanOrEqualTo filters columns with index less than or equal to the specified column.
func (*DataTable) FilterColsByColNameContains ¶ added in v0.2.9
FilterColsByColNameContains filters columns whose name contains the specified substring.
func (*DataTable) FilterColsByColNameEqualTo ¶ added in v0.2.9
FilterColsByColNameEqualTo filters to only keep the column with the specified name.
func (*DataTable) FilterRows ¶ added in v0.2.9
FilterRows applies a custom filter function to each cell in the DataTable and keeps only rows where the filter function returns true for at least one cell. The filter function receives: colindex (column letter), colname (column name), and x (cell value).
func (*DataTable) FilterRowsByRowIndexEqualTo ¶ added in v0.2.9
FilterRowsByRowIndexEqualTo filters to only keep the row with the specified index.
func (*DataTable) FilterRowsByRowIndexGreaterThan ¶ added in v0.2.9
FilterRowsByRowIndexGreaterThan filters rows with index greater than the specified threshold.
func (*DataTable) FilterRowsByRowIndexGreaterThanOrEqualTo ¶ added in v0.2.9
FilterRowsByRowIndexGreaterThanOrEqualTo filters rows with index greater than or equal to the specified threshold.
func (*DataTable) FilterRowsByRowIndexLessThan ¶ added in v0.2.9
FilterRowsByRowIndexLessThan filters rows with index less than the specified threshold.
func (*DataTable) FilterRowsByRowIndexLessThanOrEqualTo ¶ added in v0.2.9
FilterRowsByRowIndexLessThanOrEqualTo filters rows with index less than or equal to the specified threshold.
func (*DataTable) FilterRowsByRowNameContains ¶ added in v0.2.9
FilterRowsByRowNameContains filters rows whose name contains the specified substring.
func (*DataTable) FilterRowsByRowNameEqualTo ¶ added in v0.2.9
FilterRowsByRowNameEqualTo filters to only keep the row with the specified name.
func (*DataTable) FindColsIfAllElementsContainSubstring ¶ added in v0.0.14
FindColsIfAllElementsContainSubstring returns the indices of columns that contain all elements that contain the given substring.
func (*DataTable) FindColsIfAnyElementContainsSubstring ¶ added in v0.0.14
FindColsIfAnyElementContainsSubstring returns the indices of columns that contain at least one element that contains the given substring.
func (*DataTable) FindColsIfContains ¶ added in v0.0.14
FindColsIfContains returns the indices of columns that contain the given element.
func (*DataTable) FindColsIfContainsAll ¶ added in v0.0.14
FindColsIfContainsAll returns the indices of columns that contain all the given elements.
func (*DataTable) FindRowsIfAllElementsContainSubstring ¶ added in v0.0.2
FindRowsIfAllElementsContainSubstring returns the indices of rows that contain all elements that contain the given substring.
func (*DataTable) FindRowsIfAnyElementContainsSubstring ¶ added in v0.0.2
FindRowsIfAnyElementContainsSubstring returns the indices of rows that contain at least one element that contains the given substring.
func (*DataTable) FindRowsIfContains ¶ added in v0.0.2
FindRowsIfContains returns the indices of rows that contain the given element.
func (*DataTable) FindRowsIfContainsAll ¶ added in v0.0.2
FindRowsIfContainsAll returns the indices of rows that contain all the given elements.
func (*DataTable) GetCol ¶ added in v0.0.14
GetCol returns a new DataList containing the data of the column with the given index.
func (*DataTable) GetColByName ¶ added in v0.2.2
func (*DataTable) GetColByNumber ¶ added in v0.0.14
func (*DataTable) GetColIndexByName ¶ added in v0.2.4
GetColIndexByName returns the column index (A, B, C, ...) by its name.
func (*DataTable) GetColIndexByNumber ¶ added in v0.2.4
GetColIndexByNumber returns the column index (A, B, C, ...) by its number (0, 1, 2, ...).
func (*DataTable) GetColNameByIndex ¶ added in v0.2.4
GetColNameByIndex gets the column name by its Excel-style index (A, B, C, ..., Z, AA, AB, ...).
func (*DataTable) GetColNameByNumber ¶ added in v0.2.2
func (*DataTable) GetColNumberByName ¶ added in v0.2.4
func (*DataTable) GetCreationTimestamp ¶ added in v0.0.1
func (*DataTable) GetElement ¶ added in v0.0.2
GetElement returns the element at the given row and column index.
func (*DataTable) GetElementByNumberIndex ¶ added in v0.0.7
func (*DataTable) GetLastModifiedTimestamp ¶ added in v0.0.1
func (*DataTable) GetRow ¶ added in v0.0.2
GetRow returns a new DataList containing the data of the row with the given index.
func (*DataTable) GetRowByName ¶ added in v0.2.2
func (*DataTable) GetRowIndexByName ¶ added in v0.2.10
GetRowIndexByName returns the index of the row with the given name. Parameters:
- name: The name of the row to find.
Returns:
- int: The row index (0-based). Returns -1 if the row name does not exist.
- bool: true if the row name exists, false otherwise.
Note:
Since Insyra's Get methods usually support -1 as an index (representing the last element), always check the boolean return value to distinguish between "name not found" and "last row".
Example:
index, exists := dt.GetRowIndexByName("MyRow")
if exists {
row := dt.GetRow(index)
}
func (*DataTable) GetRowNameByIndex ¶ added in v0.0.2
GetRowNameByIndex returns the name of the row at the given index. Parameters:
- index: The row index (0-based). Supports negative indices (e.g., -1 for the last row).
Returns:
- string: The name of the row. Returns empty string if no name is set for this row.
- bool: true if a row name exists for this index, false otherwise.
Example:
name, exists := dt.GetRowNameByIndex(0)
if exists {
fmt.Println("Row name:", name)
}
func (*DataTable) GroupBy ¶ added in v0.2.18
func (dt *DataTable) GroupBy(keyCols ...string) *GroupedDataTable
GroupBy splits dt into groups defined by the unique combinations of values in the given key columns and returns an intermediate object that supports Aggregate / AggregateAll / Count.
Each entry in keyCols may be a column name or an Excel-style column index ("A", "B", ..., "AA"). Lookups try the name first, then fall back to the alphabetic index. Unknown columns are recorded on the parent DataTable's Err() and Aggregate will return an empty DataTable.
Group order in the resulting DataTable follows the order in which each key combination is first seen during a single linear scan of the input rows.
func (*DataTable) Headers ¶ added in v0.2.5
Headers is an alias for ColNames, returning the column names of the DataTable.
func (*DataTable) LabelEncode ¶ added in v0.2.19
func (dt *DataTable) LabelEncode(opts LabelEncodeOptions) (*DataTable, *LabelEncoder, error)
LabelEncode fits integer ids for a column and returns a new table.
func (*DataTable) Map ¶ added in v0.2.0
Map applies a function to all elements in the DataTable and returns a new DataTable with the results. The mapFunc should take three parameters: row index (int), column index (string), and element (any), then return a transformed value of any type.
func (*DataTable) MaxAbsScale ¶ added in v0.2.19
func (dt *DataTable) MaxAbsScale(cols ...string) (*DataTable, *MaxAbsScaler, error)
MaxAbsScale fits a MaxAbsScaler on cols and returns the scaled table.
func (*DataTable) Merge ¶ added in v0.2.11
func (dt *DataTable) Merge(other IDataTable, direction MergeDirection, mode MergeMode, on ...string) (*DataTable, error)
Merge merges two DataTables based on a key column or row name. direction: MergeDirectionHorizontal (join columns) or MergeDirectionVertical (join rows) mode: MergeModeInner, MergeModeOuter, MergeModeLeft, or MergeModeRight on: (Optional) the name of the column to join on (for horizontal). If empty or omitted, uses row names.
func (*DataTable) MinMaxScale ¶ added in v0.2.19
func (dt *DataTable) MinMaxScale(featureMin, featureMax float64, cols ...string) (*DataTable, *MinMaxScaler, error)
MinMaxScale fits a MinMaxScaler on cols and returns the scaled table.
func (*DataTable) OneHotEncode ¶ added in v0.2.19
func (dt *DataTable) OneHotEncode(opts OneHotOptions) (*DataTable, *OneHotEncoder, error)
OneHotEncode fits one-hot indicators for selected columns and returns a new table.
func (*DataTable) OrdinalEncode ¶ added in v0.2.19
func (dt *DataTable) OrdinalEncode(opts OrdinalEncodeOptions) (*DataTable, *OrdinalEncoder, error)
OrdinalEncode fits explicit ordered ids for a column and returns a new table.
func (*DataTable) PctChangeCol ¶ added in v0.2.18
PctChangeCol returns a new column equal to dt[col].PctChange(periods).
func (*DataTable) Pivot ¶ added in v0.2.18
func (dt *DataTable) Pivot(cfg PivotConfig) (*DataTable, error)
Pivot reshapes long-form data into wide form. Each unique combination of the Index columns becomes a row in the result; each unique value of the Columns column becomes a new column header; cells are filled from the Values column. When an (Index, Columns) pair is not unique, AggFunc must be set; missing combinations are filled with FillNA.
The returned *DataTable is a new instance and shares no backing storage with the receiver. The receiver itself is not modified.
func (*DataTable) Replace ¶ added in v0.2.11
Replace all occurrences of oldValue with newValue in the DataTable.
func (*DataTable) ReplaceInCol ¶ added in v0.2.11
Replace occurrences of oldValue with newValue in a specific column of the DataTable.
Parameters ¶
- colIndex: The index or name of the column to perform replacements in.
- oldValue: The value to be replaced.
- newValue: The value to replace with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceInRow ¶ added in v0.2.11
Replace occurrences of oldValue with newValue in a specific row of the DataTable.
Parameters ¶
- rowIndex: The index of the row to perform replacements in.
- oldValue: The value to be replaced.
- newValue: The value to replace with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceNaNsAndNilsInCol ¶ added in v0.2.11
ReplaceNaNsAndNilsInCol replaces occurrences of NaN and nil with newValue in a specific column of the DataTable.
Parameters ¶
- colIndex: The index of the column to perform replacements in.
- newValue: The value to replace NaNs and nils with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceNaNsAndNilsInRow ¶ added in v0.2.11
Replace occurrences of NaN and nil with newValue in a specific row of the DataTable.
Parameters ¶
- rowIndex: The index of the row to perform replacements in.
- newValue: The value to replace NaNs and nils with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceNaNsAndNilsWith ¶ added in v0.2.11
Replace all occurrences of NaN and nil with newValue in the DataTable.
func (*DataTable) ReplaceNaNsInCol ¶ added in v0.2.11
Replace occurrences of NaN with newValue in a specific column of the DataTable.
Parameters ¶
- colIndex: The index or name of the column to perform replacements in.
- newValue: The value to replace NaNs with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceNaNsInRow ¶ added in v0.2.11
Replace occurrences of NaN with newValue in a specific row of the DataTable.
Parameters ¶
- rowIndex: The index of the row to perform replacements in.
- newValue: The value to replace NaNs with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceNaNsWith ¶ added in v0.2.11
Replace all occurrences of NaN with newValue in the DataTable.
func (*DataTable) ReplaceNilsInCol ¶ added in v0.2.11
ReplaceNilsInCol replaces occurrences of nil with newValue in a specific column of the DataTable.
Parameters ¶
- colIndex: The index of the column to perform replacements in.
- newValue: The value to replace nils with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceNilsInRow ¶ added in v0.2.11
Replace occurrences of nil with newValue in a specific row of the DataTable.
Parameters ¶
- rowIndex: The index of the row to perform replacements in.
- newValue: The value to replace nils with.
- mode (optional): An integer indicating the replacement mode.
- 0 (default): Replace all occurrences.
- 1: Replace only the first occurrence.
- -1: Replace only the last occurrence.
func (*DataTable) ReplaceNilsWith ¶ added in v0.2.11
Replace all occurrences of nil with newValue in the DataTable.
func (*DataTable) RobustScale ¶ added in v0.2.19
func (dt *DataTable) RobustScale(cols ...string) (*DataTable, *RobustScaler, error)
RobustScale fits a RobustScaler on cols and returns the scaled table.
func (*DataTable) RollingCol ¶ added in v0.2.18
func (dt *DataTable) RollingCol(col string, opts RollingOptions) *RollingDataList
RollingCol returns a RollingDataList view of dt[col]. Terminal reducers (Mean, Sum, Min, Max, Median, Std, Var, Apply, Corr) produce a *DataList the same length as the column.
func (*DataTable) RowNames ¶ added in v0.2.4
RowNames returns a slice of all row names in the DataTable. Rows without names will have an empty string. Returns:
- []string: A slice containing the names of all rows.
func (*DataTable) RowNamesToFirstCol ¶ added in v0.2.4
RowNamesToFirstCol moves the row names to the first column of the DataTable. This will clear the row names map and insert a new column at the beginning. Returns:
- *DataTable: The DataTable instance for chaining.
func (*DataTable) Sample ¶ added in v0.2.19
func (dt *DataTable) Sample(n int, withReplacement bool, options ...SamplingOptions) *DataTable
Sample returns a new DataTable containing n randomly selected rows.
func (*DataTable) SampleFrac ¶ added in v0.2.19
func (dt *DataTable) SampleFrac(frac float64, withReplacement bool, options ...SamplingOptions) *DataTable
SampleFrac returns a new DataTable containing frac of the rows.
func (*DataTable) SetColNameByIndex ¶ added in v0.2.2
func (*DataTable) SetColNameByNumber ¶ added in v0.2.2
func (*DataTable) SetColNames ¶ added in v0.2.5
func (*DataTable) SetColToRowNames ¶ added in v0.0.14
SetColToRowNames sets the row names to the values of the specified column and drops the column.
func (*DataTable) SetHeaders ¶ added in v0.2.5
SetHeaders is an alias for SetColNames, setting the column names of the DataTable.
func (*DataTable) SetRowNameByIndex ¶ added in v0.0.2
SetRowNameByIndex sets the name of the row at the given index. Parameters:
- index: The row index (0-based). Supports negative indices (e.g., -1 for the last row).
- name: The name to set for the row. If empty, the row name is removed.
Returns:
- *DataTable: The DataTable instance for chaining.
Example:
dt.SetRowNameByIndex(0, "FirstRow")
func (*DataTable) SetRowNames ¶ added in v0.2.5
SetRowNames sets the row names of the DataTable. Different from SetColNames, it only sets names for existing rows.
func (*DataTable) SetRowToColNames ¶ added in v0.0.14
SetRowToColNames sets the column names to the values of the specified row and drops the row.
func (*DataTable) ShiftCol ¶ added in v0.2.18
ShiftCol returns a new column equal to dt[col].Shift(periods, fill...). Returns an empty DataList when the column is missing.
func (*DataTable) Show ¶ added in v0.0.1
func (dt *DataTable) Show()
Show displays the content of the DataTable in a formatted way. For more control over which rows to display, use ShowRange.
func (*DataTable) ShowRange ¶ added in v0.2.0
ShowRange displays the DataTable with a specified range of rows. startEnd is an optional parameter that can be [start, end] to specify the range of rows to display. if startEnd is not provided, all rows will be displayed. if only one value is provided, there are two behaviors: - if positive, it shows the first N rows (e.g., ShowRange(5) shows first 5 rows) - if negative, it shows the last N rows (e.g., ShowRange(-5) shows last 5 rows) For two parameters [start, end], it shows rows from index start (inclusive) to index end (exclusive). If end is nil, it shows rows from index start to the end of the table. Example: dt.ShowRange() - shows all rows Example: dt.ShowRange(5) - shows the first 5 rows Example: dt.ShowRange(-5) - shows the last 5 rows Example: dt.ShowRange(2, 10) - shows rows with indices 2 to 9 (not including 10) Example: dt.ShowRange(2, nil) - shows rows from index 2 to the end of the table
func (*DataTable) ShowTypes ¶ added in v0.0.3
func (dt *DataTable) ShowTypes()
ShowTypes displays the data types of each element in the DataTable. For more control over which rows to display, use ShowTypesRange.
func (*DataTable) ShowTypesRange ¶ added in v0.2.0
ShowTypesRange displays the data types of each element in the DataTable within a specified range of rows. startEnd is an optional parameter that can be [start, end] to specify the range of rows to display. if startEnd is not provided, all rows will be displayed. if only one value is provided, there are two behaviors: - if positive, it shows the first N rows (e.g., ShowTypesRange(5) shows first 5 rows) - if negative, it shows the last N rows (e.g., ShowTypesRange(-5) shows last 5 rows) For two parameters [start, end], it shows rows from index start (inclusive) to index end (exclusive). If end is nil, it shows rows from index start to the end of the table. Example: dt.ShowTypesRange() - shows all rows Example: dt.ShowTypesRange(5) - shows the first 5 rows Example: dt.ShowTypesRange(-5) - shows the last 5 rows Example: dt.ShowTypesRange(2, 10) - shows rows with indices 2 to 9 (not including 10) Example: dt.ShowTypesRange(2, nil) - shows rows from index 2 to the end of the table
func (*DataTable) Shuffle ¶ added in v0.2.19
func (dt *DataTable) Shuffle(options ...SamplingOptions) *DataTable
Shuffle returns a new DataTable with rows randomly reordered.
func (*DataTable) SimpleRandomSample
deprecated
added in
v0.2.5
SimpleRandomSample returns a new DataTable containing a simple random sample of the specified size. If sampleSize is greater than the number of rows in the DataTable, it returns a copy of the original DataTable. If sampleSize is less than or equal to 0, it returns an empty DataTable.
Deprecated: use Sample(n, false) instead, which shares the SamplingOptions (seed/reproducibility) surface with the other sampling methods. Note Sample returns an empty table (and sets Err) when n exceeds the row count rather than cloning the whole table.
func (*DataTable) Size ¶ added in v0.0.4
Size returns the number of rows and columns in the DataTable.
func (*DataTable) SortBy ¶ added in v0.2.5
func (dt *DataTable) SortBy(configs ...DataTableSortConfig) *DataTable
SortBy sorts the DataTable based on multiple columns as specified in the configs. Supports sorting by column index (A, B, ...), column number (0, 1, ...), or column name. For multi-column sorting, the order of configs determines the priority (first config has highest priority).
func (*DataTable) StandardScale ¶ added in v0.2.19
func (dt *DataTable) StandardScale(cols ...string) (*DataTable, *StandardScaler, error)
StandardScale fits a StandardScaler on cols and returns the scaled table.
func (*DataTable) Summary ¶ added in v0.2.0
func (dt *DataTable) Summary()
Summary displays a comprehensive statistical summary of the DataTable directly to the console. It shows overall statistics for the entire table, including the number of rows and columns, and aggregate information about data types and values across the whole table. The output is formatted for easy reading with proper color coding.
func (*DataTable) SwapColsByIndex ¶ added in v0.2.2
SwapColsByIndex swaps two columns by their letter indices.
func (*DataTable) SwapColsByName ¶ added in v0.2.2
SwapColsByName swaps two columns by their names.
func (*DataTable) SwapColsByNumber ¶ added in v0.2.2
SwapColsByNumber swaps two columns by their numerical indices.
func (*DataTable) SwapRowsByIndex ¶ added in v0.2.2
SwapRowsByIndex swaps two rows by their numerical indices.
func (*DataTable) SwapRowsByName ¶ added in v0.2.2
SwapRowsByName swaps two rows by their names.
func (*DataTable) To2DSlice ¶ added in v0.2.7
To2DSlice converts the DataTable to a 2D slice of any. Each row in the DataTable becomes a slice in the outer slice, and each column's element at that row becomes an element in the inner slice. If a column is shorter than the maximum row count, nil values are used to fill.
func (*DataTable) ToCSV ¶ added in v0.0.3
func (dt *DataTable) ToCSV(filePath string, setRowNamesToFirstCol bool, setColNamesToFirstRow bool, includeBOM bool) error
ToCSV converts the DataTable to CSV format and writes it to the provided file path. The function accepts two parameters: - filePath: the file path to write the CSV file to - setRowNamesToFirstCol: if true, the first column will be used as row names - setColNamesToFirstRow: if true, the first row will be used as column names
func (*DataTable) ToJSON ¶ added in v0.1.5
ToJSON converts the DataTable to JSON format and writes it to the provided file path. The function accepts two parameters: - filePath: the file path to write the JSON file to. - useColName: if true, the column names will be used as keys in the JSON object, otherwise the column index(A, B, C...) will be used. Every row will be a JSON object with the column names as keys and the row values as values. The function returns an error if the file cannot be created or the JSON data cannot be written to the file.
func (*DataTable) ToJSON_Bytes ¶ added in v0.1.5
ToJSON_Byte converts the DataTable to JSON format and returns it as a byte slice. The function accepts one parameter: - useColName: if true, the column names will be used as keys in the JSON object, otherwise the column index(A, B, C...) will be used. Every row will be a JSON object with the column names as keys and the row values as values. The function returns the JSON data as a byte slice.
func (*DataTable) ToJSON_String ¶ added in v0.2.8
ToJSON_String converts the DataTable to JSON format and returns it as a string. The function accepts one parameter: - useColName: if true, the column names will be used as keys in the JSON object, otherwise the column index(A, B, C...) will be used. Every row will be a JSON object with the column names as keys and the row values as values. The function returns the JSON data as a string.
func (*DataTable) ToMap ¶ added in v0.2.8
ToMap is the alias for Data(). It returns a map[string][]any representation of the DataTable.
func (*DataTable) ToSQL ¶ added in v0.2.0
ToSQL writes the DataTable to the given database table.
Equivalent to ToSQLContext(context.Background(), db, tableName, options...).
func (*DataTable) ToSQLContext ¶ added in v0.2.18
func (dt *DataTable) ToSQLContext(ctx context.Context, db *gorm.DB, tableName string, options ...ToSQLOptions) error
ToSQLContext is the context-aware variant of ToSQL.
All database calls run under ctx, so callers can cancel long writes. Rows are inserted with batched multi-value INSERT statements; the batch size is controlled by options[0].BatchSize.
func (*DataTable) TrainTestSplit ¶ added in v0.2.19
func (dt *DataTable) TrainTestSplit(trainFrac float64, options ...SamplingOptions) (*DataTable, *DataTable)
TrainTestSplit splits the DataTable into train and test tables.
func (*DataTable) Transpose ¶ added in v0.0.8
Transpose transposes the DataTable, converting rows into columns and vice versa.
func (*DataTable) Unpivot ¶ added in v0.2.18
func (dt *DataTable) Unpivot(cfg UnpivotConfig) (*DataTable, error)
Unpivot reshapes wide-form data into long form. Each input row is expanded into one output row per ValueVar, with VarName recording the source column name and ValueName recording the cell value. IDVars are copied unchanged.
The returned *DataTable is a new instance and shares no backing storage with the receiver. The receiver itself is not modified.
func (*DataTable) UpdateColByNumber ¶ added in v0.0.14
UpdateColByNumber updates the column at the given index.
func (*DataTable) UpdateElement ¶ added in v0.0.2
UpdateElement updates the element at the given row and column index.
type DataTableSortConfig ¶ added in v0.2.5
type DataTableSortConfig struct {
ColumnIndex string // The column index (A, B, C, ...) of the column to sort by, highest priority
ColumnNumber int // The column number (0, 1, 2, ...) of the column to sort by, lowest priority
ColumnName string // The column name of the column to sort by, second priority
Descending bool // Whether to sort in descending order, default is ascending
}
type DescribeOptions ¶ added in v0.2.19
type DescribeOptions struct {
// Percentiles contains percentile positions in the inclusive range [0, 1].
// When omitted, Describe uses 0.25, 0.5, and 0.75.
Percentiles []float64
// IncludeAll includes non-numeric and mixed columns in DataTable and
// GroupedDataTable descriptions.
IncludeAll bool
}
DescribeOptions configures Describe output for DataList, DataTable, and GroupedDataTable.
type Encoder ¶ added in v0.2.19
type Encoder interface {
Transform(dt *DataTable) (*DataTable, error)
InverseTransform(dt *DataTable) (*DataTable, error)
Kind() string
}
Encoder is the minimal shared surface for fitted categorical encoders.
type ErrPoppingMode ¶ added in v0.2.2
type ErrPoppingMode int
const ( // ErrPoppingModeFIFO retrieves the first error in the buffer. ErrPoppingModeFIFO ErrPoppingMode = iota // ErrPoppingModeLIFO retrieves the last error in the buffer. ErrPoppingModeLIFO )
ErrPoppingMode defines the mode for popping errors.
type ErrorInfo ¶ added in v0.2.13
type ErrorInfo struct {
Level LogLevel
PackageName string
FuncName string
Message string
Timestamp time.Time
}
ErrorInfo represents a structured error with context information. It is the public-facing error type returned by error retrieval functions.
func GetAllErrors ¶ added in v0.2.13
func GetAllErrors() []ErrorInfo
GetAllErrors returns a copy of all errors in the buffer without removing them. The returned slice is ordered from oldest to newest (FIFO order).
func GetErrorsByLevel ¶ added in v0.2.13
GetErrorsByLevel returns all errors at the specified level without removing them.
func GetErrorsByPackage ¶ added in v0.2.13
GetErrorsByPackage returns all errors from the specified package without removing them.
func PeekError ¶ added in v0.2.13
func PeekError(mode ErrPoppingMode) *ErrorInfo
PeekError returns the error at the specified position without removing it. Returns nil if the buffer is empty or index is out of bounds. Mode determines whether to peek from the front (FIFO) or back (LIFO).
func PopAllErrors ¶ added in v0.2.13
func PopAllErrors() []ErrorInfo
PopAllErrors retrieves and removes all errors from the buffer. The returned slice is ordered from oldest to newest (FIFO order).
func PopErrorInfo ¶ added in v0.2.13
func PopErrorInfo(mode ErrPoppingMode) *ErrorInfo
PopErrorInfo retrieves and removes an error with full context information. Returns nil if the buffer is empty.
type ExpandingDataList ¶ added in v0.2.18
type ExpandingDataList struct {
// contains filtered or unexported fields
}
ExpandingDataList is the intermediate produced by DataList.Expanding. Each position i is reduced over in[0..=i] when at least MinObs valid observations are available, else nil.
func (*ExpandingDataList) Max ¶ added in v0.2.18
func (e *ExpandingDataList) Max() *DataList
Max returns the expanding maximum.
func (*ExpandingDataList) Mean ¶ added in v0.2.18
func (e *ExpandingDataList) Mean() *DataList
Mean returns the expanding mean.
func (*ExpandingDataList) Median ¶ added in v0.2.18
func (e *ExpandingDataList) Median() *DataList
Median returns the expanding median.
func (*ExpandingDataList) Min ¶ added in v0.2.18
func (e *ExpandingDataList) Min() *DataList
Min returns the expanding minimum.
func (*ExpandingDataList) Std ¶ added in v0.2.18
func (e *ExpandingDataList) Std() *DataList
Std returns the expanding sample (n-1) standard deviation. Positions with fewer than 2 valid observations emit nil regardless of MinObs.
func (*ExpandingDataList) Sum ¶ added in v0.2.18
func (e *ExpandingDataList) Sum() *DataList
Sum returns the expanding sum.
func (*ExpandingDataList) Var ¶ added in v0.2.18
func (e *ExpandingDataList) Var() *DataList
Var returns the expanding sample (n-1) variance. Positions with fewer than 2 valid observations emit nil regardless of MinObs.
type GroupedColumnTransform ¶ added in v0.2.18
type GroupedColumnTransform struct {
// contains filtered or unexported fields
}
GroupedColumnTransform is the terminal-stage intermediate for group-aware column transforms. It carries the per-group transform function and the source column; .As(name) executes the transform group-by-group and scatters results back into a row-aligned column.
func (*GroupedColumnTransform) As ¶ added in v0.2.18
func (t *GroupedColumnTransform) As(name string) *DataList
As executes the configured transform and returns the resulting column with the given name. The output has the same length as the parent DataTable (rows that didn't appear in any group, if any, remain nil).
type GroupedDataTable ¶ added in v0.2.18
type GroupedDataTable struct {
// contains filtered or unexported fields
}
GroupedDataTable is the lightweight intermediate produced by DataTable.GroupBy. It is not safe for concurrent use across goroutines and should be consumed by calling Aggregate, AggregateAll, or Count once before reuse.
func (*GroupedDataTable) Aggregate ¶ added in v0.2.18
func (g *GroupedDataTable) Aggregate(configs ...AggregateConfig) *DataTable
Aggregate produces a new DataTable with one row per group. The first columns are the keys passed to GroupBy (in the original order); the remaining columns are produced by configs (in the order passed). Custom funcs receive a DataList containing the source-column values for that group, including any nil entries, in the original row order.
func (*GroupedDataTable) AggregateAll ¶ added in v0.2.18
func (g *GroupedDataTable) AggregateAll(op AggregateOp) *DataTable
AggregateAll applies op to every column that is not a group key. The output names are auto-derived (e.g., "revenue_sum"). Columns that produce no valid numeric values for any group simply emit NaN, matching DataList.Sum etc.
func (*GroupedDataTable) Count ¶ added in v0.2.18
func (g *GroupedDataTable) Count() *DataTable
Count returns a DataTable with a single aggregate column ("count") that holds the size of each group (including rows where every value is nil).
func (*GroupedDataTable) CumMaxCol ¶ added in v0.2.18
func (g *GroupedDataTable) CumMaxCol(col string) *GroupedColumnTransform
CumMaxCol applies CumMax per group.
func (*GroupedDataTable) CumMinCol ¶ added in v0.2.18
func (g *GroupedDataTable) CumMinCol(col string) *GroupedColumnTransform
CumMinCol applies CumMin per group.
func (*GroupedDataTable) CumProdCol ¶ added in v0.2.18
func (g *GroupedDataTable) CumProdCol(col string) *GroupedColumnTransform
CumProdCol applies CumProd per group.
func (*GroupedDataTable) CumSumCol ¶ added in v0.2.18
func (g *GroupedDataTable) CumSumCol(col string) *GroupedColumnTransform
CumSumCol applies CumSum per group.
func (*GroupedDataTable) Describe ¶ added in v0.2.19
func (g *GroupedDataTable) Describe(options ...DescribeOptions) *DataTable
Describe returns one summary row per group. Key columns are emitted first, followed by flattened summary columns named "<source>_<stat>".
func (*GroupedDataTable) DiffCol ¶ added in v0.2.18
func (g *GroupedDataTable) DiffCol(col string, periods int) *GroupedColumnTransform
DiffCol applies Diff per group.
func (*GroupedDataTable) ExpandingCol ¶ added in v0.2.18
func (g *GroupedDataTable) ExpandingCol(col string, minObs int) *GroupedExpandingCol
ExpandingCol builds an expanding-window view over the given column, scoped to each group separately.
func (*GroupedDataTable) PctChangeCol ¶ added in v0.2.18
func (g *GroupedDataTable) PctChangeCol(col string, periods int) *GroupedColumnTransform
PctChangeCol applies PctChange per group.
func (*GroupedDataTable) RollingCol ¶ added in v0.2.18
func (g *GroupedDataTable) RollingCol(col string, opts RollingOptions) *GroupedRollingCol
RollingCol builds a rolling-window view over the given column, scoped to each group separately.
func (*GroupedDataTable) ShiftCol ¶ added in v0.2.18
func (g *GroupedDataTable) ShiftCol(col string, periods int, fill ...any) *GroupedColumnTransform
ShiftCol applies Shift per group.
type GroupedExpandingCol ¶ added in v0.2.18
type GroupedExpandingCol struct {
// contains filtered or unexported fields
}
GroupedExpandingCol is the intermediate produced by GroupedDataTable.ExpandingCol. Each terminal reducer returns a GroupedColumnTransform whose .As(name) materializes the column.
func (*GroupedExpandingCol) Max ¶ added in v0.2.18
func (ge *GroupedExpandingCol) Max() *GroupedColumnTransform
Max executes a per-group expanding max.
func (*GroupedExpandingCol) Mean ¶ added in v0.2.18
func (ge *GroupedExpandingCol) Mean() *GroupedColumnTransform
Mean executes a per-group expanding mean.
func (*GroupedExpandingCol) Median ¶ added in v0.2.18
func (ge *GroupedExpandingCol) Median() *GroupedColumnTransform
Median executes a per-group expanding median.
func (*GroupedExpandingCol) Min ¶ added in v0.2.18
func (ge *GroupedExpandingCol) Min() *GroupedColumnTransform
Min executes a per-group expanding min.
func (*GroupedExpandingCol) Std ¶ added in v0.2.18
func (ge *GroupedExpandingCol) Std() *GroupedColumnTransform
Std executes a per-group expanding sample std.
func (*GroupedExpandingCol) Sum ¶ added in v0.2.18
func (ge *GroupedExpandingCol) Sum() *GroupedColumnTransform
Sum executes a per-group expanding sum.
func (*GroupedExpandingCol) Var ¶ added in v0.2.18
func (ge *GroupedExpandingCol) Var() *GroupedColumnTransform
Var executes a per-group expanding sample variance.
type GroupedRollingCol ¶ added in v0.2.18
type GroupedRollingCol struct {
// contains filtered or unexported fields
}
GroupedRollingCol is the intermediate produced by GroupedDataTable.RollingCol. Its terminal reducers (Sum / Mean / Min / Max / Median / Std / Var / Apply) each return a GroupedColumnTransform whose .As(name) materializes the per-group rolling column.
func (*GroupedRollingCol) Apply ¶ added in v0.2.18
func (gr *GroupedRollingCol) Apply(fn func(window []any) any) *GroupedColumnTransform
Apply executes a per-group rolling custom reducer.
func (*GroupedRollingCol) Max ¶ added in v0.2.18
func (gr *GroupedRollingCol) Max() *GroupedColumnTransform
Max executes a per-group rolling max.
func (*GroupedRollingCol) Mean ¶ added in v0.2.18
func (gr *GroupedRollingCol) Mean() *GroupedColumnTransform
Mean executes a per-group rolling mean.
func (*GroupedRollingCol) Median ¶ added in v0.2.18
func (gr *GroupedRollingCol) Median() *GroupedColumnTransform
Median executes a per-group rolling median.
func (*GroupedRollingCol) Min ¶ added in v0.2.18
func (gr *GroupedRollingCol) Min() *GroupedColumnTransform
Min executes a per-group rolling min.
func (*GroupedRollingCol) Std ¶ added in v0.2.18
func (gr *GroupedRollingCol) Std() *GroupedColumnTransform
Std executes a per-group rolling sample std.
func (*GroupedRollingCol) Sum ¶ added in v0.2.18
func (gr *GroupedRollingCol) Sum() *GroupedColumnTransform
Sum executes a per-group rolling sum.
func (*GroupedRollingCol) Var ¶ added in v0.2.18
func (gr *GroupedRollingCol) Var() *GroupedColumnTransform
Var executes a per-group rolling sample variance.
type IDataList ¶
type IDataList interface {
AtomicDo(func(*DataList))
GetCreationTimestamp() int64
GetLastModifiedTimestamp() int64
GetName() string
SetName(string) *DataList
Data() []any
Append(values ...any) *DataList
Concat(other IDataList) *DataList
AppendDataList(other IDataList) *DataList
Get(index int) any
Clone() *DataList
Count(value any) int
Counter() map[any]int
Update(index int, value any) *DataList
InsertAt(index int, value any) *DataList
FindFirst(any) any
FindLast(any) any
FindAll(any) []int
Filter(func(any) bool) *DataList
ReplaceFirst(any, any) *DataList
ReplaceLast(any, any) *DataList
ReplaceAll(any, any) *DataList
ReplaceOutliers(float64, float64) *DataList
Pop() any
Drop(index int) *DataList
DropAll(...any) *DataList
DropIfContains(string) *DataList
Clear() *DataList
ClearStrings() *DataList
ClearNumbers() *DataList
ClearNaNs() *DataList
ClearNils() *DataList
ClearNilsAndNaNs() *DataList
ClearOutliers(float64) *DataList
ReplaceNaNsWith(any) *DataList
ReplaceNilsWith(any) *DataList
ReplaceNaNsAndNilsWith(any) *DataList
Normalize() *DataList
Standardize() *DataList
FillNaNWithMean() *DataList
FillWithMean() *DataList
FillForward(limit ...int) *DataList
FillBackward(limit ...int) *DataList
FillWithMedian() *DataList
FillWithMode() *DataList
FillByInterpolation(extrapolate ...bool) *DataList
MovingAverage(int) *DataList
WeightedMovingAverage(int, any) *DataList
ExponentialSmoothing(float64) *DataList
DoubleExponentialSmoothing(float64, float64) *DataList
MovingStdev(int) *DataList
Len() int
Sample(n int, withReplacement bool, options ...SamplingOptions) *DataList
SampleFrac(frac float64, withReplacement bool, options ...SamplingOptions) *DataList
Shuffle(options ...SamplingOptions) *DataList
Sort(ascending ...bool) *DataList
Map(mapFunc func(int, any) any) *DataList
Rank(ascending ...bool) *DataList
Reverse() *DataList
Upper() *DataList
Lower() *DataList
Capitalize() *DataList // Statistics
Sum() float64
Max() float64
Min() float64
Mean() float64
WeightedMean(weights any) float64
GMean() float64
Median() float64
Mode() []float64
MAD() float64
Stdev() float64
StdevP() float64
Var() float64
VarP() float64
Range() float64
Quartile(int) float64
IQR() float64
Percentile(float64) float64
Difference() *DataList
Describe(...DescribeOptions) *DataTable
Summary()
// Error handling (instance-level)
Err() *ErrorInfo
ClearErr() *DataList
// comparison
IsEqualTo(*DataList) bool
IsTheSameAs(*DataList) bool
Show()
ShowRange(startEnd ...any)
ShowTypes()
ShowTypesRange(startEnd ...any)
// conversion
ParseNumbers() *DataList
ParseStrings() *DataList
ToF64Slice() []float64
ToStringSlice() []string
// Interpolation
LinearInterpolation(float64) float64
QuadraticInterpolation(float64) float64
LagrangeInterpolation(float64) float64
NearestNeighborInterpolation(float64) float64
NewtonInterpolation(float64) float64
HermiteInterpolation(float64, []float64) float64
// contains filtered or unexported methods
}
IDataList defines the behavior expected from a DataList.
type IDataTable ¶ added in v0.0.1
type IDataTable interface {
AtomicDo(func(*DataTable))
AppendCols(columns ...*DataList) *DataTable
AppendRowsFromDataList(rowsData ...*DataList) *DataTable
AppendRowsByColIndex(rowsData ...map[string]any) *DataTable
AppendRowsByColName(rowsData ...map[string]any) *DataTable
GetElement(rowIndex int, columnIndex string) any
GetElementByNumberIndex(rowIndex int, columnIndex int) any
GetCol(index string) *DataList
GetColByNumber(index int) *DataList
GetColByName(name string) *DataList
GetRow(index int) *DataList
GetRowByName(name string) *DataList
UpdateElement(rowIndex int, columnIndex string, value any) *DataTable
UpdateCol(index string, dl *DataList) *DataTable
UpdateColByNumber(index int, dl *DataList) *DataTable
UpdateRow(index int, dl *DataList) *DataTable
SetColToRowNames(columnIndex string) *DataTable
SetRowToColNames(rowIndex int) *DataTable
ChangeColName(oldName, newName string) *DataTable
GetColNameByNumber(index int) string
GetColIndexByName(name string) string
GetColIndexByNumber(number int) string
GetColNumberByName(name string) int
SetColNameByIndex(index string, name string) *DataTable
SetColNameByNumber(numberIndex int, name string) *DataTable
ColNamesToFirstRow() *DataTable
DropColNames() *DataTable
ColNames() []string
Headers() []string
SetColNames(colNames []string) *DataTable
SetHeaders(headers []string) *DataTable
FindRowsIfContains(value any) []int
FindRowsIfContainsAll(values ...any) []int
FindRowsIfAnyElementContainsSubstring(substring string) []int
FindRowsIfAllElementsContainSubstring(substring string) []int
FindColsIfContains(value any) []string
FindColsIfContainsAll(values ...any) []string
FindColsIfAnyElementContainsSubstring(substring string) []string
FindColsIfAllElementsContainSubstring(substring string) []string
DropColsByName(columnNames ...string) *DataTable
DropColsByIndex(columnIndices ...string) *DataTable
DropColsByNumber(columnIndices ...int) *DataTable
DropColsContainString() *DataTable
DropColsContainNumber() *DataTable
DropColsContainNil() *DataTable
DropColsContainNaN() *DataTable
DropColsContain(value ...any) *DataTable
DropColsContainExcelNA() *DataTable
DropRowsByIndex(rowIndices ...int) *DataTable
DropRowsByName(rowNames ...string) *DataTable
DropRowsContainString() *DataTable
DropRowsContainNumber() *DataTable
DropRowsContainNil() *DataTable
DropRowsContainNaN() *DataTable
DropRowsContain(value ...any) *DataTable
DropRowsContainExcelNA() *DataTable
Data(useNamesAsKeys ...bool) map[string][]any
// ToMap is the alias for Data().
// It returns a map[string][]any representation of the DataTable.
// Parameters:
// - useNamesAsKeys: Whether to use column names as keys in the returned map
// Returns:
// - map[string][]any: DataTable represented as a map of columns
ToMap(useNamesAsKeys ...bool) map[string][]any
Show()
ShowTypes()
ShowRange(startEnd ...any)
ShowTypesRange(startEnd ...any)
// GetRowIndexByName returns the index of a row by its name.
// Returns -1 and false if the row name does not exist.
// Always check the boolean return value to distinguish between "name not found" and "last row",
// since -1 typically represents the last element in Insyra's Get methods.
GetRowIndexByName(name string) (int, bool)
// GetRowNameByIndex returns the name of a row at the given index.
// Returns empty string and false if no name is set for the row.
GetRowNameByIndex(index int) (string, bool)
SetRowNameByIndex(index int, name string) *DataTable
ChangeRowName(oldName, newName string) *DataTable
RowNamesToFirstCol() *DataTable
DropRowNames() *DataTable
RowNames() []string
SetRowNames(rowNames []string) *DataTable
GetCreationTimestamp() int64
GetLastModifiedTimestamp() int64
// name
GetName() string
SetName(name string) *DataTable
// Statistics
Size() (numRows int, numCols int)
NumRows() int
NumCols() int
Count(value any) int
Mean() any
Describe(...DescribeOptions) *DataTable
Summary()
// Error handling (instance-level)
Err() *ErrorInfo
ClearErr() *DataTable
// Operations
Transpose() *DataTable
Clone() *DataTable
To2DSlice() [][]any
SimpleRandomSample(sampleSize int) *DataTable
Sample(n int, withReplacement bool, options ...SamplingOptions) *DataTable
SampleFrac(frac float64, withReplacement bool, options ...SamplingOptions) *DataTable
Shuffle(options ...SamplingOptions) *DataTable
TrainTestSplit(trainFrac float64, options ...SamplingOptions) (*DataTable, *DataTable)
Map(mapFunc func(rowIndex int, colIndex string, element any) any) *DataTable
SortBy(configs ...DataTableSortConfig) *DataTable
// Filters
Filter(filterFunc func(rowIndex int, columnIndex string, value any) bool) *DataTable
FilterByCustomElement(f func(value any) bool) *DataTable
FilterRows(filterFunc func(colIndex, colName string, x any) bool) *DataTable
FilterCols(filterFunc func(rowIndex int, rowName string, x any) bool) *DataTable
FilterColsByColIndexGreaterThan(threshold string) *DataTable
FilterColsByColIndexGreaterThanOrEqualTo(threshold string) *DataTable
FilterColsByColIndexLessThan(threshold string) *DataTable
FilterColsByColIndexLessThanOrEqualTo(threshold string) *DataTable
FilterColsByColIndexEqualTo(index string) *DataTable
FilterColsByColNameEqualTo(name string) *DataTable
FilterColsByColNameContains(substring string) *DataTable
FilterRowsByRowNameEqualTo(name string) *DataTable
FilterRowsByRowNameContains(substring string) *DataTable
FilterRowsByRowIndexGreaterThan(threshold int) *DataTable
FilterRowsByRowIndexGreaterThanOrEqualTo(threshold int) *DataTable
FilterRowsByRowIndexLessThan(threshold int) *DataTable
FilterRowsByRowIndexLessThanOrEqualTo(threshold int) *DataTable
FilterRowsByRowIndexEqualTo(index int) *DataTable
// Swap
SwapColsByName(columnName1 string, columnName2 string) *DataTable
SwapColsByIndex(columnIndex1 string, columnIndex2 string) *DataTable
SwapColsByNumber(columnNumber1 int, columnNumber2 int) *DataTable
SwapRowsByIndex(rowIndex1 int, rowIndex2 int) *DataTable
SwapRowsByName(rowName1 string, rowName2 string) *DataTable
// CSV
ToCSV(filePath string, setRowNamesToFirstCol bool, setColNamesToFirstRow bool, includeBOM bool) error
// JSON
// ToJSON saves the DataTable as a JSON file.
// Parameters:
// - filePath: Output JSON file path
// - useColNames: Whether to use column names as keys in JSON objects
// Returns:
// - error: Error information, returns nil if successful
ToJSON(filePath string, useColNames bool) error
// ToJSON_Bytes converts the DataTable to JSON format and returns it as a byte slice.
// Parameters:
// - useColNames: Whether to use column names as keys in JSON objects
// Returns:
// - []byte: JSON data as byte slice
ToJSON_Bytes(useColNames bool) []byte
// ToJSON_String converts the DataTable to JSON format and returns it as a string.
// Parameters:
// - useColNames: Whether to use column names as keys in JSON objects
// Returns:
// - string: JSON data as a string
ToJSON_String(useColNames bool) string
ToSQL(db *gorm.DB, tableName string, options ...ToSQLOptions) error
Merge(other IDataTable, direction MergeDirection, mode MergeMode, on ...string) (*DataTable, error)
AddColUsingCCL(newColName, ccl string) *DataTable
// Replace
Replace(oldValue, newValue any) *DataTable
ReplaceNaNsWith(newValue any) *DataTable
ReplaceNilsWith(newValue any) *DataTable
ReplaceNaNsAndNilsWith(newValue any) *DataTable
FillForward(int, ...string) *DataTable
FillBackward(int, ...string) *DataTable
FillWithMean(...string) *DataTable
FillWithMedian(...string) *DataTable
FillWithMode(...string) *DataTable
FillByInterpolation(...string) *DataTable
// Encoding
OneHotEncode(opts OneHotOptions) (*DataTable, *OneHotEncoder, error)
LabelEncode(opts LabelEncodeOptions) (*DataTable, *LabelEncoder, error)
OrdinalEncode(opts OrdinalEncodeOptions) (*DataTable, *OrdinalEncoder, error)
// Scaling
StandardScale(cols ...string) (*DataTable, *StandardScaler, error)
MinMaxScale(featureMin, featureMax float64, cols ...string) (*DataTable, *MinMaxScaler, error)
RobustScale(cols ...string) (*DataTable, *RobustScaler, error)
MaxAbsScale(cols ...string) (*DataTable, *MaxAbsScaler, error)
ReplaceInRow(rowIndex int, oldValue, newValue any, mode ...int) *DataTable
ReplaceNaNsInRow(rowIndex int, newValue any, mode ...int) *DataTable
ReplaceNilsInRow(rowIndex int, newValue any, mode ...int) *DataTable
ReplaceNaNsAndNilsInRow(rowIndex int, newValue any, mode ...int) *DataTable
ReplaceInCol(colIndex string, oldValue, newValue any, mode ...int) *DataTable
ReplaceNaNsInCol(colIndex string, newValue any, mode ...int) *DataTable
ReplaceNilsInCol(colIndex string, newValue any, mode ...int) *DataTable
ReplaceNaNsAndNilsInCol(colIndex string, newValue any, mode ...int) *DataTable
// contains filtered or unexported methods
}
IDataTable defines the behavior expected from a DataTable.
func ConvertLongDataToWide
deprecated
added in
v0.0.8
func ConvertLongDataToWide(data, factor IDataList, independents []IDataList, aggFunc func([]float64) float64) IDataTable
Deprecated: ConvertLongDataToWide is misleadingly named — it does not produce a true wide-form pivot. It groups rows by the factor column and aggregates every independent column and the observation column into a single row per factor (i.e. one cell per (factor, column) pair, never spreading the factor values into new column headers).
For an actual long-to-wide reshape (where unique values of one column become new column headers), use (*DataTable).Pivot. For the group-and-summarise behaviour this function provides, use (*DataTable).GroupBy followed by Aggregate, which is type-safe, surfaces errors via Err(), and supports the full AggregateOp set:
dt.GroupBy("factor").Aggregate(
insyra.AggregateConfig{SourceCol: "ind1", Op: insyra.OpMean},
insyra.AggregateConfig{SourceCol: "ind2", Op: insyra.OpMean},
insyra.AggregateConfig{SourceCol: "data", Op: insyra.OpMean},
)
This function will be removed in a future release.
type LabelEncodeOptions ¶ added in v0.2.19
type LabelEncodeOptions struct {
Column string
NewColumn string
SortBy LabelSort
HandleNaN NaNPolicy
Unknown UnknownPolicy
KeepOriginal bool
}
LabelEncodeOptions configures DataTable label encoding.
type LabelEncoder ¶ added in v0.2.19
type LabelEncoder struct {
// contains filtered or unexported fields
}
LabelEncoder stores a fitted label encoding.
func (*LabelEncoder) Classes ¶ added in v0.2.19
func (e *LabelEncoder) Classes() []any
Classes returns category values by id.
func (*LabelEncoder) Inverse ¶ added in v0.2.19
func (e *LabelEncoder) Inverse(values ...any) ([]any, error)
Inverse maps label ids back to category values.
func (*LabelEncoder) InverseTransform ¶ added in v0.2.19
func (e *LabelEncoder) InverseTransform(dt *DataTable) (*DataTable, error)
InverseTransform decodes the label column back to its source values.
func (*LabelEncoder) Kind ¶ added in v0.2.19
func (e *LabelEncoder) Kind() string
Kind returns the encoder family name.
type LabelSort ¶ added in v0.2.19
type LabelSort int
LabelSort controls id assignment order for LabelEncode.
type LogLevel ¶
type LogLevel int
const ( // LogLevelDebug is the log level for debug messages. LogLevelDebug LogLevel = iota // LogLevelInfo is the log level for info messages. LogLevelInfo // LogLevelWarning is the log level for warning messages. LogLevelWarning // LogLevelFatal is the log level for fatal messages. LogLevelFatal )
func PopError ¶ added in v0.2.2
func PopError(mode ErrPoppingMode) (LogLevel, string)
PopError retrieves and removes the first error from the buffer. If the buffer is empty, it returns an empty string and LogLevelInfo.
func PopErrorByFuncName ¶ added in v0.2.2
func PopErrorByFuncName(packageName, funcName string, mode ErrPoppingMode) (LogLevel, string)
func PopErrorByPackageName ¶ added in v0.2.2
func PopErrorByPackageName(packageName string, mode ErrPoppingMode) (LogLevel, string)
type MaxAbsScaler ¶ added in v0.2.19
type MaxAbsScaler struct {
// contains filtered or unexported fields
}
MaxAbsScaler scales each column by its maximum absolute value, preserving sign and mapping data into [-1, 1].
func NewMaxAbsScaler ¶ added in v0.2.19
func NewMaxAbsScaler() *MaxAbsScaler
NewMaxAbsScaler returns an unfitted max-abs scaler.
func (*MaxAbsScaler) Fit ¶ added in v0.2.19
Fit learns scaling parameters from the given columns without modifying dt. cols is required; pass at least one column reference (name or Excel-style index such as "A").
func (*MaxAbsScaler) FitDataList ¶ added in v0.2.19
FitDataList learns scaling parameters from a single DataList.
func (*MaxAbsScaler) FitTransform ¶ added in v0.2.19
FitTransform fits on cols and immediately returns the scaled table.
func (*MaxAbsScaler) FitTransformDataList ¶ added in v0.2.19
FitTransformDataList fits on dl and returns a new scaled DataList.
func (*MaxAbsScaler) InverseTransform ¶ added in v0.2.19
InverseTransform restores the original scale of fitted columns and returns a new table. Unfitted columns pass through unchanged; fitted columns absent from dt are simply skipped (so predictions covering a subset still work).
func (*MaxAbsScaler) InverseTransformDataList ¶ added in v0.2.19
InverseTransformDataList restores the original scale of dl, returning a new DataList.
func (*MaxAbsScaler) Kind ¶ added in v0.2.19
func (s *MaxAbsScaler) Kind() string
Kind returns the scaler family name ("standard", "minmax", "robust", "maxabs").
func (*MaxAbsScaler) Params ¶ added in v0.2.19
func (s *MaxAbsScaler) Params() map[string]ScalerParams
Params returns the fitted parameters keyed by output column name.
func (*MaxAbsScaler) Transform ¶ added in v0.2.19
Transform applies the fitted parameters to dt and returns a new table. The original table is not modified. Unfitted columns pass through unchanged. A fitted column missing from dt is an error.
func (*MaxAbsScaler) TransformDataList ¶ added in v0.2.19
TransformDataList scales dl using the fitted parameters, returning a new DataList. The original list is not modified.
type MergeDirection ¶ added in v0.2.11
type MergeDirection int
const ( MergeDirectionHorizontal MergeDirection = iota MergeDirectionVertical )
type MinMaxScaler ¶ added in v0.2.19
type MinMaxScaler struct {
// contains filtered or unexported fields
}
MinMaxScaler scales columns into a [featureMin, featureMax] range.
func NewDefaultMinMaxScaler ¶ added in v0.2.19
func NewDefaultMinMaxScaler() *MinMaxScaler
NewDefaultMinMaxScaler returns a min-max scaler targeting [0, 1].
func NewMinMaxScaler ¶ added in v0.2.19
func NewMinMaxScaler(featureMin, featureMax float64) *MinMaxScaler
NewMinMaxScaler returns an unfitted min-max scaler targeting the given range.
func (*MinMaxScaler) Fit ¶ added in v0.2.19
Fit learns scaling parameters from the given columns without modifying dt. cols is required; pass at least one column reference (name or Excel-style index such as "A").
func (*MinMaxScaler) FitDataList ¶ added in v0.2.19
FitDataList learns scaling parameters from a single DataList.
func (*MinMaxScaler) FitTransform ¶ added in v0.2.19
FitTransform fits on cols and immediately returns the scaled table.
func (*MinMaxScaler) FitTransformDataList ¶ added in v0.2.19
FitTransformDataList fits on dl and returns a new scaled DataList.
func (*MinMaxScaler) InverseTransform ¶ added in v0.2.19
InverseTransform restores the original scale of fitted columns and returns a new table. Unfitted columns pass through unchanged; fitted columns absent from dt are simply skipped (so predictions covering a subset still work).
func (*MinMaxScaler) InverseTransformDataList ¶ added in v0.2.19
InverseTransformDataList restores the original scale of dl, returning a new DataList.
func (*MinMaxScaler) Kind ¶ added in v0.2.19
func (s *MinMaxScaler) Kind() string
Kind returns the scaler family name ("standard", "minmax", "robust", "maxabs").
func (*MinMaxScaler) Params ¶ added in v0.2.19
func (s *MinMaxScaler) Params() map[string]ScalerParams
Params returns the fitted parameters keyed by output column name.
func (*MinMaxScaler) Transform ¶ added in v0.2.19
Transform applies the fitted parameters to dt and returns a new table. The original table is not modified. Unfitted columns pass through unchanged. A fitted column missing from dt is an error.
func (*MinMaxScaler) TransformDataList ¶ added in v0.2.19
TransformDataList scales dl using the fitted parameters, returning a new DataList. The original list is not modified.
type NaNPolicy ¶ added in v0.2.19
type NaNPolicy int
NaNPolicy controls how missing (nil or NaN) source values are encoded.
type OneHotEncoder ¶ added in v0.2.19
type OneHotEncoder struct {
// contains filtered or unexported fields
}
OneHotEncoder stores fitted one-hot category mappings.
func (*OneHotEncoder) Categories ¶ added in v0.2.19
func (e *OneHotEncoder) Categories() map[string][]any
Categories returns source-column categories in encoded order.
func (*OneHotEncoder) InverseTransform ¶ added in v0.2.19
func (e *OneHotEncoder) InverseTransform(dt *DataTable) (*DataTable, error)
InverseTransform rebuilds source columns from one-hot indicators.
func (*OneHotEncoder) Kind ¶ added in v0.2.19
func (e *OneHotEncoder) Kind() string
Kind returns the encoder family name.
func (*OneHotEncoder) OutputColumns ¶ added in v0.2.19
func (e *OneHotEncoder) OutputColumns() []string
OutputColumns returns generated indicator column names.
type OneHotOptions ¶ added in v0.2.19
type OneHotOptions struct {
Columns []string
DropFirst bool
HandleNaN NaNPolicy
Unknown UnknownPolicy
Prefix string
Separator string
KeepOriginal bool
SortCategories bool
}
OneHotOptions configures DataTable one-hot encoding.
type OrdinalEncodeOptions ¶ added in v0.2.19
type OrdinalEncodeOptions struct {
Column string
Order []any
NewColumn string
HandleNaN NaNPolicy
Unknown UnknownPolicy
KeepOriginal bool
}
OrdinalEncodeOptions configures DataTable ordinal encoding.
type OrdinalEncoder ¶ added in v0.2.19
type OrdinalEncoder struct {
// contains filtered or unexported fields
}
OrdinalEncoder stores a fitted ordinal encoding.
func (*OrdinalEncoder) Classes ¶ added in v0.2.19
func (e *OrdinalEncoder) Classes() []any
Classes returns category values by id.
func (*OrdinalEncoder) Inverse ¶ added in v0.2.19
func (e *OrdinalEncoder) Inverse(values ...any) ([]any, error)
Inverse maps ordinal ids back to category values.
func (*OrdinalEncoder) InverseTransform ¶ added in v0.2.19
func (e *OrdinalEncoder) InverseTransform(dt *DataTable) (*DataTable, error)
InverseTransform decodes the ordinal column back to its source values.
func (*OrdinalEncoder) Kind ¶ added in v0.2.19
func (e *OrdinalEncoder) Kind() string
Kind returns the encoder family name.
type PivotConfig ¶ added in v0.2.18
type PivotConfig struct {
// Index lists the columns kept as identifiers; their unique combinations
// form the row keys of the output. At least one entry is required. Each
// entry is resolved by name first, then as an Excel-style index.
Index []string
// Columns names the column whose unique values become new column headers
// in the output. Required. Resolved by name first, then as an Excel-style
// index.
Columns string
// Values names the column whose cell values fill the new (Index, Columns)
// cells. Required. Resolved by name first, then as an Excel-style index.
Values string
// AggFunc is the aggregator applied when an (Index, Columns) combination
// occurs more than once. Recognised names: "sum", "mean" (alias "avg"),
// "median", "min", "max", "count" (non-nil), "countall" (group size),
// "stdev" (alias "std"), "stdevp" (alias "stdp"), "var", "varp", "first",
// "last", "nunique", "custom" (requires Custom). When empty, Pivot
// returns an error if any (Index, Columns) combination has duplicates.
AggFunc string
// Custom is required when AggFunc == "custom". It receives the values
// belonging to the (Index, Columns) cell as a *DataList in original row
// order, including nil entries.
Custom func(group *DataList) any
// FillNA fills cells where no input row matched the (Index, Columns)
// combination. Default nil.
FillNA any
// SortCols controls whether generated columns are emitted in sorted
// order of their key value (true) or first-seen order (false, default).
SortCols bool
}
PivotConfig describes a long-to-wide reshape produced by (*DataTable).Pivot.
Given a long-form table, Pivot keeps the rows identified by Index, spreads the unique values of Columns into new column headers, and fills the new cells with values drawn from the Values column. When the same (Index, Columns) combination appears in more than one input row, the duplicate values are reduced via AggFunc.
Column reference resolution: every column-name field below (Index, Columns, Values) is matched against column.name first; if no column has that name, it falls back to the Excel-style alphabetic index ("A" → column 0, "B" → column 1, ..., "AA" → column 26). The first row of data is never consulted. Tokens that match neither a name nor a valid alphabetic index produce an error.
type ReadSQLChunk ¶ added in v0.2.18
ReadSQLChunk is a streamed slice of rows produced by ReadSQLStream. Exactly one of Table or Err is set per chunk.
type ReadSQLOptions ¶ added in v0.2.0
type ReadSQLOptions struct {
// RowNameColumn names the column whose values should become DataTable row
// names. If empty, no column is treated as the row-name column. Defaults
// to "row_name" when neither RowNameColumn nor IndexCol is set.
RowNameColumn string
// IndexCol is an alias for RowNameColumn. When non-empty it overrides
// RowNameColumn, mirroring pandas' read_sql(index_col=...).
IndexCol string
// Query is a custom SQL query. When set, all other query-shape options
// (Columns, WhereClause, OrderBy, Limit, Offset, Schema) are ignored.
Query string
// Params binds positional parameters to Query (or to the auto-built
// query when supported). Pandas' read_sql(params=...) equivalent.
Params []any
// Columns restricts the auto-built SELECT to these columns. Ignored when
// Query is set.
Columns []string
// Schema is an optional schema (PostgreSQL) or database (MySQL) prefix
// for the auto-built query. SQLite ignores this.
Schema string
Limit int // Limit the number of rows to read
Offset int // Starting position for reading rows
WhereClause string // WHERE clause body (without the "WHERE" keyword)
OrderBy string // ORDER BY clause body (without the "ORDER BY" keyword)
// ParseDates names columns whose string/[]byte values should be parsed
// as time.Time. Several common ISO-style layouts are tried.
ParseDates []string
// DType forces the resulting Go type for the named columns. Recognized
// targets are reflect.TypeFor[int64](), float64, bool, string, time.Time,
// and []byte. Unknown targets fall back to default handling.
DType map[string]reflect.Type
// ChunkSize is the per-chunk row count used by ReadSQLStream. Zero
// falls back to defaultStreamChunkSize. Ignored by ReadSQL/ReadSQLContext.
ChunkSize int
}
type RobustScaler ¶ added in v0.2.19
type RobustScaler struct {
// contains filtered or unexported fields
}
RobustScaler centers on the median and scales by the IQR, making it robust to outliers.
func NewRobustScaler ¶ added in v0.2.19
func NewRobustScaler() *RobustScaler
NewRobustScaler returns an unfitted robust scaler.
func (*RobustScaler) Fit ¶ added in v0.2.19
Fit learns scaling parameters from the given columns without modifying dt. cols is required; pass at least one column reference (name or Excel-style index such as "A").
func (*RobustScaler) FitDataList ¶ added in v0.2.19
FitDataList learns scaling parameters from a single DataList.
func (*RobustScaler) FitTransform ¶ added in v0.2.19
FitTransform fits on cols and immediately returns the scaled table.
func (*RobustScaler) FitTransformDataList ¶ added in v0.2.19
FitTransformDataList fits on dl and returns a new scaled DataList.
func (*RobustScaler) InverseTransform ¶ added in v0.2.19
InverseTransform restores the original scale of fitted columns and returns a new table. Unfitted columns pass through unchanged; fitted columns absent from dt are simply skipped (so predictions covering a subset still work).
func (*RobustScaler) InverseTransformDataList ¶ added in v0.2.19
InverseTransformDataList restores the original scale of dl, returning a new DataList.
func (*RobustScaler) Kind ¶ added in v0.2.19
func (s *RobustScaler) Kind() string
Kind returns the scaler family name ("standard", "minmax", "robust", "maxabs").
func (*RobustScaler) Params ¶ added in v0.2.19
func (s *RobustScaler) Params() map[string]ScalerParams
Params returns the fitted parameters keyed by output column name.
func (*RobustScaler) Transform ¶ added in v0.2.19
Transform applies the fitted parameters to dt and returns a new table. The original table is not modified. Unfitted columns pass through unchanged. A fitted column missing from dt is an error.
func (*RobustScaler) TransformDataList ¶ added in v0.2.19
TransformDataList scales dl using the fitted parameters, returning a new DataList. The original list is not modified.
type RollingDataList ¶ added in v0.2.18
type RollingDataList struct {
// contains filtered or unexported fields
}
RollingDataList is the intermediate produced by DataList.Rolling. The terminal reducers (Sum / Mean / Min / Max / Median / Std / Var / Apply / Corr) each return a new DataList of the same length as the source. A RollingDataList carries a snapshot of the source data; the source itself is not held under lock while reducers run.
func (*RollingDataList) Apply ¶ added in v0.2.18
func (r *RollingDataList) Apply(fn func(window []any) any) *DataList
Apply runs fn over each window and writes its return value to the output column. fn receives the raw window slice (with the original any values, nils preserved), letting callers implement custom reducers. MinObs is counted on numeric values, but the slice passed to fn covers the full in-range window including nils.
func (*RollingDataList) Corr ¶ added in v0.2.18
func (r *RollingDataList) Corr(other *DataList) *DataList
Corr returns the rolling Pearson correlation against other. The two DataLists are aligned by index; pairs where either side is non-numeric or nil are skipped within each window. Windows with fewer than 2 valid pairs emit nil.
func (*RollingDataList) Max ¶ added in v0.2.18
func (r *RollingDataList) Max() *DataList
Max returns the rolling maximum.
func (*RollingDataList) Mean ¶ added in v0.2.18
func (r *RollingDataList) Mean() *DataList
Mean returns the rolling mean. When Weights are set the result is the weighted mean (sum of v_i * w_i divided by sum of w_i).
func (*RollingDataList) Median ¶ added in v0.2.18
func (r *RollingDataList) Median() *DataList
Median returns the rolling median (linear interpolation of the two middle values when the window has an even count of valid observations).
func (*RollingDataList) Min ¶ added in v0.2.18
func (r *RollingDataList) Min() *DataList
Min returns the rolling minimum.
func (*RollingDataList) Std ¶ added in v0.2.18
func (r *RollingDataList) Std() *DataList
Std returns the rolling sample (n-1) standard deviation. Windows with fewer than 2 valid values emit nil regardless of MinObs.
func (*RollingDataList) Sum ¶ added in v0.2.18
func (r *RollingDataList) Sum() *DataList
Sum returns the rolling sum. Weights, when set, multiply each value element-wise (length must equal Window).
func (*RollingDataList) Var ¶ added in v0.2.18
func (r *RollingDataList) Var() *DataList
Var returns the rolling sample (n-1) variance. Windows with fewer than 2 valid values emit nil regardless of MinObs.
type RollingOptions ¶ added in v0.2.18
RollingOptions configures a rolling-window computation. Window is required and must be positive. MinObs is the minimum number of valid (non-nil, numeric) observations a window must contain for the reducer to emit a value; when fewer valid observations are available the output is nil. MinObs defaults to Window when zero. Center, when true, anchors the window at the central index following pandas conventions (window covers [i-(w-1)/2, i+w/2], clipped to [0, n-1]). Weights, when set, must have length equal to Window and are used by Sum and Mean only.
type SQLActionIfTableExists ¶ added in v0.2.0
type SQLActionIfTableExists int
const ( SQLActionIfTableExistsFail SQLActionIfTableExists = iota SQLActionIfTableExistsReplace SQLActionIfTableExistsAppend )
type SamplingOptions ¶ added in v0.2.19
SamplingOptions configures random sampling and train/test splitting.
type Scaler ¶ added in v0.2.19
type Scaler interface {
Fit(dt *DataTable, cols ...string) error
Transform(dt *DataTable) (*DataTable, error)
FitTransform(dt *DataTable, cols ...string) (*DataTable, error)
InverseTransform(dt *DataTable) (*DataTable, error)
Params() map[string]ScalerParams
Kind() string
}
Scaler is the shared surface for fitted, reusable feature scalers.
Unlike DataList.Normalize/Standardize (stateless, in-place), a Scaler fits parameters once and can Transform/InverseTransform new tables with the same parameters, which is the correct way to scale a test set with statistics learned from the training set (no data leakage).
type ScalerParams ¶ added in v0.2.19
type ScalerParams struct {
Column string
Kind string
Mean float64
Std float64
Min float64
Max float64
Median float64
Q1 float64
Q3 float64
IQR float64
MaxAbs float64
OutputMin float64
OutputMax float64
}
ScalerParams reports the fitted parameters for a single scaled column. Only the fields relevant to the scaler kind are populated; the rest stay at their zero value.
type StandardScaler ¶ added in v0.2.19
type StandardScaler struct {
// contains filtered or unexported fields
}
StandardScaler scales columns to zero mean and unit (sample) standard deviation, matching DataList.Standardize's use of the sample stdev.
func NewStandardScaler ¶ added in v0.2.19
func NewStandardScaler() *StandardScaler
NewStandardScaler returns an unfitted standard scaler.
func (*StandardScaler) Fit ¶ added in v0.2.19
Fit learns scaling parameters from the given columns without modifying dt. cols is required; pass at least one column reference (name or Excel-style index such as "A").
func (*StandardScaler) FitDataList ¶ added in v0.2.19
FitDataList learns scaling parameters from a single DataList.
func (*StandardScaler) FitTransform ¶ added in v0.2.19
FitTransform fits on cols and immediately returns the scaled table.
func (*StandardScaler) FitTransformDataList ¶ added in v0.2.19
FitTransformDataList fits on dl and returns a new scaled DataList.
func (*StandardScaler) InverseTransform ¶ added in v0.2.19
InverseTransform restores the original scale of fitted columns and returns a new table. Unfitted columns pass through unchanged; fitted columns absent from dt are simply skipped (so predictions covering a subset still work).
func (*StandardScaler) InverseTransformDataList ¶ added in v0.2.19
InverseTransformDataList restores the original scale of dl, returning a new DataList.
func (*StandardScaler) Kind ¶ added in v0.2.19
func (s *StandardScaler) Kind() string
Kind returns the scaler family name ("standard", "minmax", "robust", "maxabs").
func (*StandardScaler) Params ¶ added in v0.2.19
func (s *StandardScaler) Params() map[string]ScalerParams
Params returns the fitted parameters keyed by output column name.
func (*StandardScaler) Transform ¶ added in v0.2.19
Transform applies the fitted parameters to dt and returns a new table. The original table is not modified. Unfitted columns pass through unchanged. A fitted column missing from dt is an error.
func (*StandardScaler) TransformDataList ¶ added in v0.2.19
TransformDataList scales dl using the fitted parameters, returning a new DataList. The original list is not modified.
type ToSQLOptions ¶ added in v0.2.0
type ToSQLOptions struct {
IfExists SQLActionIfTableExists // "fail", "replace", "append"
RowNames bool
ColumnTypes map[string]string // 自訂型別
// Schema is an optional schema (PostgreSQL) or database (MySQL) name to
// prefix the table reference with. SQLite ignores this. The caller is
// responsible for any required quoting; the value is passed through as-is.
Schema string
// BatchSize controls how many rows are bundled into a single multi-row
// INSERT. Zero falls back to defaultBatchSize. Note that the total number
// of bind parameters per batch (BatchSize * column-count) must stay below
// the driver limit (PostgreSQL/MySQL: 65535).
BatchSize int
}
type UnknownPolicy ¶ added in v0.2.19
type UnknownPolicy int
UnknownPolicy controls unseen categories encountered during Transform.
const ( // UnknownIgnore emits all-zero indicators or nil cells for unseen values. UnknownIgnore UnknownPolicy = iota // UnknownError returns an error for the first unseen value. UnknownError // UnknownAsNew extends the encoder with unseen values. UnknownAsNew )
type UnpivotConfig ¶ added in v0.2.18
type UnpivotConfig struct {
// IDVars lists the columns kept as-is (identifier columns). Each entry
// is resolved by name first, then as an Excel-style index.
IDVars []string
// ValueVars lists the columns to unpivot. Each entry is resolved by name
// first, then as an Excel-style index. When empty, all columns not
// listed in IDVars are unpivoted.
ValueVars []string
// VarName is the name of the new "variable" column in the output. When
// empty it defaults to "variable".
VarName string
// ValueName is the name of the new "value" column in the output. When
// empty it defaults to "value".
ValueName string
// DropNA, when true, omits rows whose value is nil or NaN.
DropNA bool
}
UnpivotConfig describes a wide-to-long reshape produced by (*DataTable).Unpivot.
Column reference resolution: every column-name field below (IDVars, ValueVars) is matched against column.name first; if no column has that name, it falls back to the Excel-style alphabetic index ("A" → column 0, "B" → column 1, ..., "AA" → column 26). The first row of data is never consulted. Tokens that match neither a name nor a valid alphabetic index produce an error.
Source Files
¶
- atomic.go
- ccl.go
- config.go
- datalist.go
- datalist_describe.go
- datalist_impute.go
- datalist_interpolation.go
- datalist_map.go
- datalist_notatomic.go
- datalist_sampling.go
- datalist_summary.go
- datalist_window.go
- datatable.go
- datatable_ccl.go
- datatable_colindex.go
- datatable_colname.go
- datatable_csv.go
- datatable_describe.go
- datatable_encode.go
- datatable_filters.go
- datatable_from_sql.go
- datatable_groupby.go
- datatable_groupby_describe.go
- datatable_groupby_window.go
- datatable_impute.go
- datatable_json.go
- datatable_map.go
- datatable_merge.go
- datatable_name.go
- datatable_pivot.go
- datatable_preprocess.go
- datatable_replace.go
- datatable_rowname.go
- datatable_sampling.go
- datatable_scale.go
- datatable_sort.go
- datatable_summary.go
- datatable_swap.go
- datatable_to_sql.go
- datatable_window.go
- describe_options.go
- describe_stats.go
- error_buffer.go
- init.go
- interfaces.go
- logger.go
- read.go
- show.go
- utils.go
- version.go
Directories
¶
| Path | Synopsis |
|---|---|
|
`allpkgs` package imports all Insyra packages.
|
`allpkgs` package imports all Insyra packages. |
|
cmd
|
|
|
insyra
command
|
|
|
`csvxl` package provides functions for converting between CSV and Excel formats.
|
`csvxl` package provides functions for converting between CSV and Excel formats. |
|
file: insyra/datafetch/yfinance_errors.go
|
file: insyra/datafetch/yfinance_errors.go |
|
Package engine exposes reusable core utilities for external projects.
|
Package engine exposes reusable core utilities for external projects. |
|
Package finance provides high-precision financial calculations (annuities, loans, NPV/IRR, rate conversions, amortization schedules) built on top of github.com/TimLai666/go-decimal.
|
Package finance provides high-precision financial calculations (annuities, loans, NPV/IRR, rate conversions, amortization schedules) built on top of github.com/TimLai666/go-decimal. |
|
`gplot` package provides functions for data visualization using gonum/plot.
|
`gplot` package provides functions for data visualization using gonum/plot. |
|
internal
|
|
|
`isr` package is the syntactic sugar of insyra.
|
`isr` package is the syntactic sugar of insyra. |
|
`lp` package provides linear programming functionalities using GLPK.
|
`lp` package provides linear programming functionalities using GLPK. |
|
`lpgen` package provides functions for generating linear programming models.
|
`lpgen` package provides functions for generating linear programming models. |
|
`mkt` package provides marketing-related data analysis functions.
|
`mkt` package provides marketing-related data analysis functions. |
|
`parallel` package provides functions for parallel processing.
|
`parallel` package provides functions for parallel processing. |
|
`plot` package provides functions for creating and manipulating plots.
|
`plot` package provides functions for creating and manipulating plots. |
|
`py` package provides functions for working with Python.
|
`py` package provides functions for working with Python. |
|
`stats` package provides functions for statistical analysis.
|
`stats` package provides functions for statistical analysis. |
|
internal/fa
fa/GPArotation_GPFoblq.go
|
fa/GPArotation_GPFoblq.go |
|
internal/parutil
Package parutil provides lightweight parallel-for primitives used inside the stats package's internal compute paths.
|
Package parutil provides lightweight parallel-for primitives used inside the stats package's internal compute paths. |
|
tools
|
|
|
gendocs
command
|
