forked from johnreutersward/opengraph
-
Notifications
You must be signed in to change notification settings - Fork 0
/
opengraph.go
82 lines (72 loc) · 2.01 KB
/
opengraph.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
// Package opengraph extracts Open Graph metadata from html documents.
// See http://ogp.me/ for more information about the Open Graph protocol.
//
// Usage:
// import "github.com/rojters/opengraph"
//
// To extract Open Graph metadata from a movie on IMDb:
//
// res, _ := http.Get("http://www.imdb.com/title/tt0118715/")
// og, _ := opengraph.Extract(res.Body)
// for _, md := range og {
// fmt.Printf("%s = %s\n", md.Property, md.Content)
// }
//
// Which will output:
//
// url = http://www.imdb.com/title/tt0118715/
// type = video.movie
// title = The Big Lebowski (1998)
// site_name = IMDb
// description = Directed by Joel Coen, Ethan Coen. With Jeff Bridges ...
// ...
//
package opengraph
import (
"io"
"strings"
"golang.org/x/net/html"
)
type MetaData struct {
Property string // Porperty attribute without namespace prefix.
Content string // See http://ogp.me/#data_types for a list of content attribute types.
}
// By default Extract will only return metadata in the Open Graph namespace.
// This variable can be changed to get data from other namespaces.
// Ex: 'fb:' for Facebook or to get all metadata regardless of namespace, set it to the empty string.
var Namespace = "og:"
// Extract extracts Open Graph metadata from a html document.
// If no relevant metadata is found the result will be empty.
// The input is assumed to be UTF-8 encoded.
func Extract(doc io.Reader) ([]MetaData, error) {
var tags []MetaData
z := html.NewTokenizer(doc)
for {
tt := z.Next()
if tt == html.ErrorToken {
if z.Err() == io.EOF {
return tags, nil
}
return nil, z.Err()
}
t := z.Token()
if t.Type == html.EndTagToken && t.Data == "head" {
return tags, nil
}
if t.Data == "meta" {
var prop, cont string
for _, a := range t.Attr {
switch a.Key {
case "property":
prop = a.Val
case "content":
cont = a.Val
}
}
if strings.HasPrefix(prop, Namespace) && cont != "" {
tags = append(tags, MetaData{prop[len(Namespace):], cont})
}
}
}
return tags, nil
}