From 10a3c43140a41f4f7f520598bc19a6a0aafc9bb2 Mon Sep 17 00:00:00 2001 From: Fan Yang Date: Mon, 26 Feb 2024 22:43:10 +0800 Subject: [PATCH] add 69xinshu handler WIP --- handler/69xinshu.go | 99 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 handler/69xinshu.go diff --git a/handler/69xinshu.go b/handler/69xinshu.go new file mode 100644 index 0000000..222ec30 --- /dev/null +++ b/handler/69xinshu.go @@ -0,0 +1,99 @@ +package handler + +import ( + "bytes" + "fmt" + "log" + "regexp" + "strings" + "unicode/utf8" + + "github.com/PuerkitoBio/goquery" + "github.com/missdeer/getnovel/config" + "github.com/missdeer/golib/ic" +) + +func preprocess69xinshuChapterListURL(u string) string { + reg := regexp.MustCompile(`https://www\.69xinshu\.com/book/([0-9]+)\.htm`) + if reg.MatchString(u) { + ss := reg.FindAllStringSubmatch(u, -1) + s := ss[0] + return fmt.Sprintf("https://www.69xinshu.com/book/%s/", s[1]) + } + return u +} + +func extract69xinshuChapterList(u string, rawPageContent []byte) (title string, chapters []*config.NovelChapterInfo) { + c := ic.Convert("gbk", "utf-8", rawPageContent) + c = bytes.Replace(c, []byte("\r\n"), []byte(""), -1) + c = bytes.Replace(c, []byte("\r"), []byte(""), -1) + c = bytes.Replace(c, []byte("\n"), []byte(""), -1) + + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(c)) + if err != nil { + log.Fatal(err) + } + doc.Find("div.bread a").Each(func(index int, item *goquery.Selection) { + title = item.Text() + }) + if strings.HasSuffix(title, `章节列表`) { + for i := 0; i < 4; i++ { + _, size := utf8.DecodeLastRuneInString(title) + title = title[:len(title)-size] + } + } + doc.Find("#catalog li").Each(func(i int, s *goquery.Selection) { + if a := s.Find("a"); a != nil { + if href, exists := a.Attr("href"); exists { + chapters = append(chapters, &config.NovelChapterInfo{ + Index: i + 1, + Title: a.Text(), + URL: href, + }) + } + } + }) + + return +} + +func extract69xinshuChapterContent(rawPageContent []byte) (c []byte) { + c = ic.Convert("gbk", "utf-8", rawPageContent) + c = bytes.Replace(c, []byte("\r\n"), []byte(""), -1) + c = bytes.Replace(c, []byte("\r"), []byte(""), -1) + c = bytes.Replace(c, []byte("\n"), []byte(""), -1) + + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(c)) + if err != nil { + log.Fatal(err) + } + c = []byte(doc.Find("div.txtnav").Text()) + return +} + +func init() { + registerNovelSiteHandler(&config.NovelSiteHandler{ + Sites: []config.NovelSite{ + { + Title: `69书吧`, + Urls: []string{`https://www.69xinshu.com/`}, + }, + }, + CanHandle: func(u string) bool { + patterns := []string{ + `https://www\.69xinshu\.com/book/[0-9]+/`, + `^https://www\.69xinshu\.com/book/[0-9]+\.html?$`, + } + for _, pattern := range patterns { + reg := regexp.MustCompile(pattern) + if reg.MatchString(u) { + return true + } + } + return false + }, + PreprocessChapterListURL: preprocess69xinshuChapterListURL, + ExtractChapterList: extract69xinshuChapterList, + ExtractChapterContent: extract69xinshuChapterContent, + }) +}