overview

package module
v0.0.0-...-98ae782 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 20, 2025 License: MIT Imports: 18 Imported by: 2

README

nntp-overview

nntp-overview generates .overview files per group from incoming usenet headers (POST, IHAVE, TAKETHIS).

Generation is done in a concurrent way and files are mmap'ed while open.

Overview file content is human readable based on RFC overview.FMT

	"MSG_NUM:",     // MSG_NUM field is not present in output to overview.FMT command but when requesting XOVER we output it
	"Subject:",
	"From:",
	"Date:",
	"Message-ID:",
	"References:",
	"Bytes:",
	"Lines:",
	"Xref:full",

OV_Handler

OV_Handler processes MMAP open/retrieve/park/close requests and schedules workers for writing overview data.

The system keeps track of last message number per group when adding new overview to group.

When integrated into a usenet server: works as a central message numbering station per group.

USAGE

import (
	"github.com/go-while/go-utils"
	"github.com/go-while/nntp-overview"
)

Example integration in repo: nntp-overview_test

Contributing

Pull requests are welcome.

For major changes, please open an issue first to discuss what you would like to change.

License

MIT

Author

go-while

Documentation

Index

Constants

View Source
const (
	XREF_PREFIX = "nntp" // todo: hardcoded fix: should be a variable hostname?

	MAX_FLUSH int64 = 5 // flush mmaps every N seconds

	LIMIT_SPLITMAX_NEWSGROUPS int = 25
	MAX_REF                   int = 30

	SIZEOF_FOOT     int = 7
	OVL_CHECKSUM    int = 5
	OVERVIEW_FIELDS int = 9 // minimum. with latest version is 10
	OVERVIEW_TABS   int = OVERVIEW_FIELDS - 1

	ZERO_PATTERN  string = "zerofill" // replace this pattern with zero padding
	ZERO_FILL_STR string = null       // zero pad with nul
	FILE_DELIM    string = "\n"       // string used as file delimiter

	OV_RESERVE_BEG = 128 // initially reserve n bytes in overview file
	OV_RESERVE_END = 128 // finally reserve n bytes in overview file

	HEADER_BEG       string = string("#ov_init=")
	HEADER_END       string = string("EOH" + FILE_DELIM)
	BODY_END         string = string(FILE_DELIM + "EOV" + FILE_DELIM)
	FOOTER_BEG       string = string(BODY_END + "time=")
	FOOTER_END       string = string(FILE_DELIM + "EOF" + FILE_DELIM)
	ZERO_PATTERN_LEN int    = len(ZERO_PATTERN) // the len of the pattern
	ERR_OV_OVERFLOW  string = "ef01"            // write overview throws errstr 'ef01' when buffer is full and needs to grow

	CHECK_CHARS_LC string = "abcdefghijklmnopqrstuvwxyz0123456789"
)

Variables

View Source
var (
	BAD_FROM_MATCH = []string{
		"FBInCIAnNSATerroristSlayer <FBInCIAnNSATerroristSlayer@yahoo.com",
	}

	BAD_FROM_CONTAINS = []string{
		"root@127.0.0.1",
		"_@_.__",
		"dmbtimesinc@aol.com",
		"@jbinup",
		"CPP-Gebruiker",
		"HeartDoc Andrew",
		"forger@",
		"forgeries@",
		"forgery@",
		"forgers@",
		"forger@",
		"paypal",
		"nforystek@sosouix.net",
		"kill@llspammers.dead",
		"front@556184.net",
		"disciple@T3WiJ.com",
		"loandbehold8434@hotmail.com",
		"SummersTrees@hotmail.com",
		"e447560@rppkn.com",
		"xyz91987@gmail.com",
		"grigiox4@gmail.com",
		"r.c.bates@btinternet.com",
		"@getfucked.com",
		"indiabusinesszone954@gmail.com",
		"habicahidi@gmail.com",
		"l4mttdnwdm@gmail.com",
		"fkhall@gmail.com",
		"00@derbyshire.kooks.out",
		"laughing@u.kook",
		"jovuli.elbahja@gmail.com",
		"thrinaxodon.fan.club512@gmail.com",
		"FNVWe@altusenetkooks.xxx",
		"maxcell9999@gmail.com",
		"@fuckhoward.net",
		"dudumacudu@mail.com",
		"nad318b404@gmail.invalid",
		"rahul.rwaltz@gmail.com",
		"enometh@meer.net",
		"pramod@confluxsystems.com",
		"nomesh.usithr@gmail.com",
		"kesava1.conflux@gmail.com",
		"xyz91987@google.com",
		"killvirus@coronavirus.com",
		"@NoReply.Invalid.com",
		"@RubyRidge.COM",
		"@XXX999.net",
		"@670iybn.com",
		"@dhp.com",
		"jd6471836@gmail.com",
		"theusenet@mail.com",
		"haya0626@mxb.meshnet.or.jp",
		"noreply@breaka.net",
		"marwa.kotb2@mediu.ws",
		"ladypilot7@gmail.com",
		"discounts@iphone",
		"ast.ahad@gmail.com",
		"jerrycalzado@gmx.com",
		"pippobattipaglia@yahoo.es",
		"ginobusciarello@outlook.com",
		"thehighestgod@gmail.com",
		"gburnore@databasix.com",
		"arcturianone@earthlink.net",
		"nostalkingme@rocketmail.com",
		"nanaestalkers@yahoo.ca",
		"stopstalking@gmx.com",
		"daniellamirande45@gmail.com",
		"nomesh.usithr@gmail.com",
		"Exoticpsychstore",
		"exoticpsychstore@gmail.com",
		"FBInCIAnNSATerroristSlayer",
		"penner@dfsadfsdfsdf.de",
		"FreeBsd@StoleMySoul.com",
		"exposed4@all.2c",
		"nforystek@outlook.com",
		"@bigfoot.com",
		"newsserver@freedyn.net",
		"danijela_milosevic@hotmail.com",
		"ooxfxs@hornysex.com",
		"make@money.com",
		"pma4jobs",
		"anabana1233@hotmail.com",
		"crisisacris@terra.es",
		"verdad@mac.com",
	}

	BAD_FROM_SUFFIX = []string{}

	BAD_SUBJ_SUFFIX = []string{
		"$$$",
		"!!!",
	}

	BAD_SUBJ_PREFIX = []string{
		"AD: ~~**",
		"TURN $",
		"Buy ",
		" Buy ",
		"•••",
		">>>",
		"~~>",
		"~~~",
		"~~*",
		"$$$",
		"!!!",
		"�ï",
	}

	BAD_SUBJ_MATCH = []string{
		".",
		",",
		";",
		"-",
		"+",
	}

	BAD_SUBJ_CONTAINS = []string{}/* 137 elements not displayed */

)
View Source
var (
	Psql     = 256  // parallel sql threads
	Flushmax = 4096 // * 16^idx = max cached to flush

)
View Source
var (
	AUTOINDEX         bool   = true
	DEBUG_OV          bool   = false
	CR                string = "\r"
	LF                string = "\n"
	CRLF              string = CR + LF
	DOT               string = "."
	DOTCRLF           string = DOT + CRLF
	PRELOAD_ZERO_1K   string
	PRELOAD_ZERO_4K   string
	PRELOAD_ZERO_128K string
	PRELOAD_ZERO_1M   string

	Known_msgids Known_MessageIDs
	OV_handler   OV_Handler
	Overview     OV
	OVIndex      OverviewIndex

	MAX_Open_overviews_chan chan struct{} // locking queue prevents opening of more overview files
	OV_AUTOINDEX_CHAN       = make(chan *NEWOVI, 1)
	// The date format layouts to try
	NNTPDateLayoutsExtended = []string{}/* 167 elements not displayed */

	NNTPDateLayouts = NNTPDateLayoutsExtended
)

Functions

func CMD_NewOverviewIndex

func CMD_NewOverviewIndex(file string, group string) bool

func Clean_Headline

func Clean_Headline(msgid string, line string, debug bool) string

func Cleanup_NewsGroups_String

func Cleanup_NewsGroups_String(newsgroup string) string

func ClearStat

func ClearStat(stat string, db *sql.DB) error

func Close_ov

func Close_ov(who string, ovfh *OVFH, update_footer bool, force_close bool) error

func ConnSQL

func ConnSQL(username string, password string, hostname string, database string) (*sql.DB, error)

func ConstructHeader

func ConstructHeader(headermap map[string][]string, keysorder *[]string) (headlines *[]string)

func Construct_OVL

func Construct_OVL(ovl OVL) string

func Create_ov

func Create_ov(who string, File_path string, hash string, pages int) error

func FilterMessageID

func FilterMessageID(messageid string) bool

func Flush_ov

func Flush_ov(who string, ovfh *OVFH) error

func GetMessageID

func GetMessageID(amsgid string, laxmid bool) (string, error)

func IsMsgidHashSQL

func IsMsgidHashSQL(messageidhash string, db *sql.DB) (bool, bool, string, error)

func IsValidGroupName

func IsValidGroupName(group string) bool

func MsgIDhash2mysql

func MsgIDhash2mysql(messageidhash string, size int, db *sql.DB) (bool, error)

func MsgIDhash2mysqlMany

func MsgIDhash2mysqlMany(key string, list []Msgidhash_item, db *sql.DB, tried int) (bool, error)

func MsgIDhash2mysqlStat

func MsgIDhash2mysqlStat(messageidhash string, stat string, db *sql.DB) (bool, error)

func OV_AutoIndex

func OV_AutoIndex()

func ParseDate

func ParseDate(dv string) (unixepoch int64, err error)

func ParseHeaderKeys

func ParseHeaderKeys(head []string, laxmid bool) (headermap map[string][]string, keysorder []string, msgid string, err error)

func PrintHashMySQL

func PrintHashMySQL(printrocksdb bool)
func Print_lines(lines []string)

func ProcessHash2sql

func ProcessHash2sql(dbh *sql.DB, hash2sql *chan map[string][]Msgidhash_item, donechan *chan struct{}, sqldonechan *chan struct{}, sqlparchan *chan struct{}, wg *sync.WaitGroup)

func ReOrderOverview

func ReOrderOverview(file string, group string, doWritestamps bool, hashdb *sql.DB) bool

func Read_Foot_ov

func Read_Foot_ov(who string, ovfh *OVFH) (string, error)

func Read_Head_ov

func Read_Head_ov(who string, ovfh *OVFH) (string, error)
func Replay_Footer(who string, ovfh *OVFH) bool

func Rescan_Overview

func Rescan_Overview(who string, file_path string, group string, mode int, DEBUG bool, db *sql.DB, hash2sql *chan map[string][]Msgidhash_item) (bool, uint64)

Rescan_Overview returns: true|false, last_msgnum

func Rescan_help

func Rescan_help()

func Scan_Overview

func Scan_Overview(file string, group string, a uint64, b uint64, fields string, conn net.Conn, initline string, txb *int) ([]string, error)

func ShortMsgIDhash2mysql

func ShortMsgIDhash2mysql(shorthash string, offset int, db *sql.DB) (bool, error)

func Split_NewsGroups

func Split_NewsGroups(msgid string, newsgroups_str string) []string

func Split_References

func Split_References(astring string) []string

func Test_Overview

func Test_Overview(who string, file_path string, DEBUG bool) bool

func Watch_overview_Workers

func Watch_overview_Workers(maxworkers int)

func WriteOverviewIndex

func WriteOverviewIndex(file string, msgnums []uint64, offsets map[uint64]int64)

func WriteOverviewIndex_INDEX

func WriteOverviewIndex_INDEX(file string, data *IndexLine)

Types

type AsortFuncInt64

type AsortFuncInt64 []int64

func (AsortFuncInt64) Len

func (nf AsortFuncInt64) Len() int

func (AsortFuncInt64) Less

func (nf AsortFuncInt64) Less(i, j int) bool

func (AsortFuncInt64) Swap

func (nf AsortFuncInt64) Swap(i, j int)

type CHECK_GROUPS

type CHECK_GROUPS struct {
	// contains filtered or unexported fields
}

type CachedOffset

type CachedOffset struct {
	Offset  int64
	Created int64
}

type IndexLine

type IndexLine struct {
	// contains filtered or unexported fields
}

type Known_MessageIDs

type Known_MessageIDs struct {
	Debug      bool // print debug messages
	MAP_MSGIDS int  // capacity
	// contains filtered or unexported fields
}

func (*Known_MessageIDs) ExpireThread

func (km *Known_MessageIDs) ExpireThread()

func (*Known_MessageIDs) SetKnown

func (km *Known_MessageIDs) SetKnown(msgidhash string) bool

func (*Known_MessageIDs) UnsetKnown

func (km *Known_MessageIDs) UnsetKnown(msgidhash string)

func (*Known_MessageIDs) UnsetKnownQuick

func (km *Known_MessageIDs) UnsetKnownQuick(msgidhash string)

type Msgidhash_item

type Msgidhash_item struct {
	Hash string
	Size int
}

type NEWOVI

type NEWOVI struct {
	// contains filtered or unexported fields
}

type OV

type OV struct {
	OVIC chan OVL
	// contains filtered or unexported fields
}

func (*OV) GO_pi_ov

func (ov *OV) GO_pi_ov(who string, overviewline string, newsgroup string, hash string, cachedir string, retchan chan ReturnChannelData) ReturnChannelData

func (*OV) Load_Overview

func (ov *OV) Load_Overview(maxworkers int, max_queue_size int, max_open_mmaps int, known_messageids int, ov_opener int, ov_closer int, close_always bool, more_parallel bool, stop_server_chan chan bool, debug_OV_handler bool) bool

type OVFH

type OVFH struct {
	File_path   string
	File_handle *os.File
	Mmap_handle mmap.MMap
	Mmap_size   int
	Mmap_range  int
	Time_open   int64
	Time_flush  int64
	Written     int
	Findex      int
	Last        uint64
	Hash        string
}

func Grow_ov

func Grow_ov(who string, ovfh *OVFH, pages int, blocksize string, mode int, delete bool) (*OVFH, error)

func Open_ov

func Open_ov(who string, file_path string) (*OVFH, error)
func Update_Footer(who string, ovfh *OVFH, src string) (*OVFH, error)

func Write_ov

func Write_ov(who string, ovfh *OVFH, data string, is_head bool, is_foot bool, grow bool, delete bool) (*OVFH, error, string)

type OVL

type OVL struct {
	// stores extracted overview line values
	/*
		"Subject:",
		"From:",
		"Date:",
		"Message-ID:",
		"References:",
		"Bytes:",
		"Lines:",
		"Xref:full",
	*/
	Subject        string
	From           string
	Date           string
	Messageid      string
	Messageidhash  string
	References     []string
	Bytes          int
	Lines          int
	Xref           string
	Newsgroups     []string
	Grouphashs     map[string]string // key group, val hash
	Checksum       int               // has to match OVL_CHECKSUM
	Retchan        chan []*ReturnChannelData
	ReaderCachedir string
}

func Extract_overview

func Extract_overview(msgid string, header []string) OVL

type OV_Handler

type OV_Handler struct {
	V     map[string]OV_Handler_data
	Debug bool

	STOP           chan bool
	MAX_OPEN_MMAPS int
	CLOSE_ALWAYS   bool
	// contains filtered or unexported fields
}

func (*OV_Handler) Check_idle

func (oh *OV_Handler) Check_idle()

func (*OV_Handler) DelHandle

func (oh *OV_Handler) DelHandle(hash string)

func (*OV_Handler) GetOpen

func (oh *OV_Handler) GetOpen(hid int, who string, file_path string, hash string) (*OVFH, bool)

func (*OV_Handler) Is_assigned

func (oh *OV_Handler) Is_assigned(hash string) bool

func (*OV_Handler) Is_open

func (oh *OV_Handler) Is_open(hash string) bool

func (*OV_Handler) KILL

func (oh *OV_Handler) KILL(who string)

func (*OV_Handler) Overview_handler_CLOSER

func (oh *OV_Handler) Overview_handler_CLOSER(hid int, close_request_chan chan Overview_Close_Request)

func (*OV_Handler) Overview_handler_OPENER

func (oh *OV_Handler) Overview_handler_OPENER(hid int, open_request_chan chan Overview_Open_Request)

func (*OV_Handler) Park

func (oh *OV_Handler) Park(hid int, who string, ovfh *OVFH) bool

func (*OV_Handler) SetOpen

func (oh *OV_Handler) SetOpen(hid int, who string, ovfh *OVFH) bool

type OV_Handler_data

type OV_Handler_data struct {
	// contains filtered or unexported fields
}

type Open_MMAP_Overviews

type Open_MMAP_Overviews struct {
	// contains filtered or unexported fields
}

every worker checks vs Open_MMAP_Overviews if any other worker is processing this group right now if map[group]bool returns true, waiting worker places a channel in ch and waits for return signal

type OverviewIndex

type OverviewIndex struct {
	IndexMap          map[string]map[uint64]CachedOffset // data[group][msgnum]offset
	IndexCache        []string                           // rotating list with cached index groups
	IndexCacheSize    int                                // number of groups we cache an index for
	IndexSubCacheSize int                                // number of offsets we cache for groups
	// contains filtered or unexported fields
}

func (*OverviewIndex) GetOVIndexCacheOffset

func (ovi *OverviewIndex) GetOVIndexCacheOffset(group string, a uint64) (offset int64)

func (*OverviewIndex) MemDropIndexCache

func (ovi *OverviewIndex) MemDropIndexCache(group string, fnum uint64)

func (*OverviewIndex) ReadOverviewIndex

func (ovi *OverviewIndex) ReadOverviewIndex(file string, group string, a uint64, b uint64) int64

func (*OverviewIndex) SetOVIndexCacheOffset

func (ovi *OverviewIndex) SetOVIndexCacheOffset(group string, fnum uint64, offset int64)

type Overview_Close_Request

type Overview_Close_Request struct {
	// contains filtered or unexported fields
}

type Overview_Open_Request

type Overview_Open_Request struct {
	// contains filtered or unexported fields
}

type Overview_Reply

type Overview_Reply struct {
	// contains filtered or unexported fields
}

type ReturnChannelData

type ReturnChannelData struct {
	Retbool   bool
	Msgnum    uint64
	Newsgroup string
	Grouphash string
}

type SPAMFILTER

type SPAMFILTER struct {
	// contains filtered or unexported fields
}

func (*SPAMFILTER) Spamfilter

func (s *SPAMFILTER) Spamfilter(input string, spamtype string, msgid string) bool

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL