Documentation
¶
Overview ¶
Package fullfeed converts partial feed to full-text feed
Index ¶
- Variables
- func ContentCacheLength() int
- func GetFullContent(config Config, link string) (fullContent string, err error)
- func GetFullFeed(config Config) (feed *feeds.Feed, errors []error)
- func GetURL(url string) (io.Reader, error)
- func InitContentCache(n int) (err error)
- func LoadSourceFeed(config Config) (feed *feeds.Feed, err error)
- type Config
- type ExtractMethod
Constants ¶
This section is empty.
Variables ¶
View Source
var UserAgent string = "fullfeed/1.0"
UserAgent header
Functions ¶
func ContentCacheLength ¶
func ContentCacheLength() int
ContentCacheLength returns the number of cached entries
func GetFullContent ¶
GetFullContent for the specified link
func GetFullFeed ¶
GetFullFeed with full text content
func InitContentCache ¶
InitContentCache setup optional download cache
Types ¶
type Config ¶
type Config struct {
// Base URL for all relative URLs
// Must be specified if different from the feed domain
BaseHref string `json:"base_href" yaml:"base_href"`
// Feed description
Description string `json:"description" yaml:"description"`
// Feed cleaning filters
Filters struct {
// Skip article with the following words in the description
Descriptions []string `json:"descriptions" yaml:"descriptions"`
// Remove the following selectors from content
Selectors []string `json:"selectors" yaml:"selectors"`
// Remove blocks of text that contain the following words
Text []string `json:"text" yaml:"text"`
// Skip article with the following words in the title
Titles []string `json:"titles" yaml:"titles"`
} `json:"filters" yaml:"filters"`
// Maximum number of processing workers (default 10)
MaxWorkers uint `json:"max_workers" yaml:"max_workers"`
// Full text extract method
// Supported Methods: query (like jquery), xpath, readability (default)
Method ExtractMethod `json:"method" yaml:"method"`
// Full text extract request
MethodRequest string `json:"method_request" yaml:"method_request"`
// Link to the original feed
URL string `json:"url" yaml:"url"`
}
Config for feed
type ExtractMethod ¶
type ExtractMethod string
ExtractMethod for full text
var ( // QueryMethod with goquery QueryMethod ExtractMethod = "query" // ReadabilityMethod by default ReadabilityMethod ExtractMethod = "readability" // XPathMethod with XML Path Language XPathMethod ExtractMethod = "xpath" )
Click to show internal directories.
Click to hide internal directories.