Documentation
¶
Index ¶
- func IsProbablyReaderable(htmlSource string, opts ...Option) bool
- type Node
- func (n *Node) AppendChild(child *Node)
- func (n *Node) FirstChild() *Node
- func (n *Node) FirstElementChild() *Node
- func (n *Node) GetAttribute(name string) string
- func (n *Node) GetAttributeByIndex(idx int) *attribute
- func (n *Node) GetAttributeLen() int
- func (n *Node) GetClassName() string
- func (n *Node) GetElementById(id string) *Node
- func (n *Node) GetId() string
- func (n *Node) GetInnerHTML() string
- func (n *Node) GetNodeName() string
- func (n *Node) GetSrc() string
- func (n *Node) GetSrcset() string
- func (n *Node) GetTextContent() string
- func (n *Node) HasAttribute(name string) bool
- func (n *Node) LastChild() *Node
- func (n *Node) RemoveAttribute(name string)
- func (n *Node) RemoveChild(child *Node) (*Node, error)
- func (n *Node) ReplaceChild(newNode, oldNode *Node) *Node
- func (n *Node) SetAttribute(name, value string)
- func (n *Node) SetClassName(str string)
- func (n *Node) SetId(str string)
- func (n *Node) SetInnerHTML(html string)
- func (n *Node) SetTextContent(text string)
- type Option
- func AllowedVideoRegex(rgx *regexp.Regexp) Option
- func CharThreshold(n int) Option
- func ClassesToPreserve(classes ...string) Option
- func DisableJSONLD(b bool) Option
- func Html2Text(f func(string) string) Option
- func KeepClasses(b bool) Option
- func MaxElemsToParse(n int) Option
- func MinContentLength(len int) Option
- func MinScore(score float64) Option
- func NTopCandidates(n int) Option
- func Serializer(f func(*Node) string) Option
- func VisibilityChecker(f func(*html.Node) bool) Option
- type Options
- type Readability
- type Result
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func IsProbablyReaderable ¶
Decides whether or not the document is reader-able without parsing the whole thing. Options:
- options.minContentLength (default 140), the minimum node content length used to decide if the document is readerable
- options.minScore (default 20), the minumum cumulated 'score' used to determine if the document is readerable
- options.visibilityChecker (default isNodeVisible), the function used to determine if a node is visible
Types ¶
type Node ¶ added in v0.1.2
type Node struct {
NodeType uint
LocalName string
TagName string
Attributes []*attribute
// relations
ParentNode *Node
NextSibling *Node
PreviousSibling *Node
PreviousElementSibling *Node
NextElementSibling *Node
ChildNodes []*Node
Children []*Node
// document
DocumentURI string
Body *Node
DocumentElement *Node
ReadabilityNode *readabilityNode
ReadabilityDataTable *readabilityDataTable
// contains filtered or unexported fields
}
func (*Node) AppendChild ¶ added in v0.1.2
func (*Node) FirstChild ¶ added in v0.1.2
func (*Node) FirstElementChild ¶ added in v0.1.2
func (*Node) GetAttribute ¶ added in v0.1.2
func (*Node) GetAttributeByIndex ¶ added in v0.1.2
func (*Node) GetAttributeLen ¶ added in v0.1.2
func (*Node) GetClassName ¶ added in v0.1.2
func (s *style) setStyle(jsName, styleValue string) {
var cssName = styleMap[jsName]
var value = s.node.getAttribute("style")
var index = 0
for index >= 0 {
var next = indexOfFrom(value, ";", index)
var length = next - index - 1
var style string
if length > 0 {
style = substring(value, index, length)
} else {
style = substring(value, index, len(style))
}
substr := substring(style, 0, strings.IndexRune(style, ':'))
if strings.TrimSpace(substr) == cssName {
value = strings.TrimSpace(substring(value, 0, index))
if next >= 0 {
value += " " + strings.TrimSpace(substring(value, next, len((value))))
}
}
index = next
}
value += " " + cssName + ": " + styleValue + ";"
s.node.setAttribute("style", strings.TrimSpace(value))
}
func (*Node) GetElementById ¶ added in v0.1.2
func (*Node) GetInnerHTML ¶ added in v0.1.2
func (*Node) GetNodeName ¶ added in v0.1.2
func (n *node) setSrcset(str string) {
n.setAttribute("srcset", str)
}
func (*Node) GetTextContent ¶ added in v0.1.2
func (*Node) HasAttribute ¶ added in v0.1.2
func (*Node) RemoveAttribute ¶ added in v0.1.2
func (*Node) ReplaceChild ¶ added in v0.1.2
func (*Node) SetAttribute ¶ added in v0.1.2
func (*Node) SetClassName ¶ added in v0.1.2
func (*Node) SetInnerHTML ¶ added in v0.1.2
func (*Node) SetTextContent ¶ added in v0.1.2
type Option ¶
type Option func(*Options)
func AllowedVideoRegex ¶
func CharThreshold ¶
func ClassesToPreserve ¶
func DisableJSONLD ¶
func KeepClasses ¶
func MaxElemsToParse ¶
func MinContentLength ¶
func NTopCandidates ¶
func Serializer ¶
type Readability ¶
type Readability struct {
// contains filtered or unexported fields
}
func New ¶
func New(htmlSource, uri string, opts ...Option) (*Readability, error)
New is the public constructor of Readability and it supports the following options:
- options.debug
- options.maxElemsToParse
- options.nbTopCandidates
- options.charThreshold
- this.classesToPreseve
- options.keepClasses
- options.serializer
func (*Readability) Parse ¶
func (r *Readability) Parse() (*Result, error)
Runs readability. Workflow:
- Prep the document by removing script tags, css, etc.
- Build readability's DOM tree.
- Grab the article content from the current dom tree.
- Replace the current DOM tree with the new one.
- Read peacefully.
type Result ¶
type Result struct {
// article title
Title string
// HTML string of processed article HTMLContent
HTMLContent string
// text content of the article, with all the HTML tags removed
TextContent string
// length of an article, in characters (runes)
Length int
// article description, or short excerpt from the content
Excerpt string
// author metadata
Byline string
// content direction
Dir string
// name of the site
SiteName string
// content language
Lang string
// published time
PublishedTime string
}
Source Files
¶
Click to show internal directories.
Click to hide internal directories.