-
Notifications
You must be signed in to change notification settings - Fork 7
/
opengraph.go
110 lines (96 loc) · 2.46 KB
/
opengraph.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
// Package opengraph extracts Open Graph metadata from HTML documents.
// See http://ogp.me/ for more information about the Open Graph protocol.
//
// Usage:
// import "github.com/johnreutersward/opengraph"
//
// To extract Open Graph metadata from a movie on IMDb (sans error handling):
//
// res, _ := http.Get("http://www.imdb.com/title/tt0118715/")
// md, _ := opengraph.Extract(res.Body)
// for i := range md {
// fmt.Printf("%s = %s\n", md[i].Property, md[i].Content)
// }
//
// Which will output:
//
// url = http://www.imdb.com/title/tt0118715/
// type = video.movie
// title = The Big Lebowski (1998)
// site_name = IMDb
// description = Directed by Joel Coen, Ethan Coen. With Jeff Bridges ...
// ...
//
package opengraph
import (
"io"
"strings"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
const (
defaultPrefix = "og"
)
type MetaData struct {
Property string // Property attribute without prefix.
Content string // Content attribute. See http://ogp.me/#data_types for a list of content attribute types.
Prefix string
}
// Extract extracts Open Graph metadata from a HTML document.
// If no relevant metadata is found the result will be empty.
// The input is assumed to be UTF-8 encoded.
func Extract(doc io.Reader) ([]MetaData, error) {
return ExtractPrefix(doc, defaultPrefix)
}
// Same as Extract but extracts metadata with a specific prefix, e.g. "fb" for Facebook.
// If prefix is empty all matching metadata is extracted.
func ExtractPrefix(doc io.Reader, prefix string) ([]MetaData, error) {
var tags []MetaData
z := html.NewTokenizer(doc)
for {
tt := z.Next()
if tt == html.ErrorToken {
if z.Err() == io.EOF {
return tags, nil
}
return nil, z.Err()
}
t := z.Token()
if t.DataAtom == atom.Head && t.Type == html.EndTagToken {
return tags, nil
}
if t.DataAtom == atom.Meta {
var prop, cont, name, tagPrefix string
for _, a := range t.Attr {
switch a.Key {
case "property":
prop = a.Val
case "name":
name = a.Val
case "content":
cont = a.Val
}
}
if prop == "" {
prop = name
}
if prop == "" || cont == "" {
continue
}
if prefix != "" {
if !strings.HasPrefix(prop, prefix+":") {
continue
}
tagPrefix = prefix
} else {
idx := strings.Index(prop, ":")
if idx == -1 {
continue
}
tagPrefix = prop[:idx]
}
tags = append(tags, MetaData{prop[len(tagPrefix+":"):], cont, tagPrefix})
}
}
return tags, nil
}