Skip to content

Commit

Permalink
add 69xinshu handler WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
missdeer committed Feb 26, 2024
1 parent 277f6f2 commit 10a3c43
Showing 1 changed file with 99 additions and 0 deletions.
99 changes: 99 additions & 0 deletions handler/69xinshu.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package handler

import (
"bytes"
"fmt"
"log"
"regexp"
"strings"
"unicode/utf8"

"github.com/PuerkitoBio/goquery"
"github.com/missdeer/getnovel/config"
"github.com/missdeer/golib/ic"
)

func preprocess69xinshuChapterListURL(u string) string {
reg := regexp.MustCompile(`https://www\.69xinshu\.com/book/([0-9]+)\.htm`)
if reg.MatchString(u) {
ss := reg.FindAllStringSubmatch(u, -1)
s := ss[0]
return fmt.Sprintf("https://www.69xinshu.com/book/%s/", s[1])
}
return u
}

func extract69xinshuChapterList(u string, rawPageContent []byte) (title string, chapters []*config.NovelChapterInfo) {
c := ic.Convert("gbk", "utf-8", rawPageContent)
c = bytes.Replace(c, []byte("\r\n"), []byte(""), -1)
c = bytes.Replace(c, []byte("\r"), []byte(""), -1)
c = bytes.Replace(c, []byte("\n"), []byte(""), -1)

doc, err := goquery.NewDocumentFromReader(bytes.NewReader(c))
if err != nil {
log.Fatal(err)
}
doc.Find("div.bread a").Each(func(index int, item *goquery.Selection) {
title = item.Text()
})
if strings.HasSuffix(title, `章节列表`) {
for i := 0; i < 4; i++ {
_, size := utf8.DecodeLastRuneInString(title)
title = title[:len(title)-size]
}
}
doc.Find("#catalog li").Each(func(i int, s *goquery.Selection) {
if a := s.Find("a"); a != nil {
if href, exists := a.Attr("href"); exists {
chapters = append(chapters, &config.NovelChapterInfo{
Index: i + 1,
Title: a.Text(),
URL: href,
})
}
}
})

return
}

func extract69xinshuChapterContent(rawPageContent []byte) (c []byte) {
c = ic.Convert("gbk", "utf-8", rawPageContent)
c = bytes.Replace(c, []byte("\r\n"), []byte(""), -1)
c = bytes.Replace(c, []byte("\r"), []byte(""), -1)
c = bytes.Replace(c, []byte("\n"), []byte(""), -1)

doc, err := goquery.NewDocumentFromReader(bytes.NewReader(c))
if err != nil {
log.Fatal(err)
}
c = []byte(doc.Find("div.txtnav").Text())
return
}

func init() {
registerNovelSiteHandler(&config.NovelSiteHandler{
Sites: []config.NovelSite{
{
Title: `69书吧`,
Urls: []string{`https://www.69xinshu.com/`},
},
},
CanHandle: func(u string) bool {
patterns := []string{
`https://www\.69xinshu\.com/book/[0-9]+/`,
`^https://www\.69xinshu\.com/book/[0-9]+\.html?$`,
}
for _, pattern := range patterns {
reg := regexp.MustCompile(pattern)
if reg.MatchString(u) {
return true
}
}
return false
},
PreprocessChapterListURL: preprocess69xinshuChapterListURL,
ExtractChapterList: extract69xinshuChapterList,
ExtractChapterContent: extract69xinshuChapterContent,
})
}

0 comments on commit 10a3c43

Please sign in to comment.