add 7mao hander WIP

missdeer · Mar 27, 2024 · 16d3910 · 16d3910
1 parent dd76dc7
commit 16d3910
Show file tree

Hide file tree

Showing 8 changed files with 282 additions and 38 deletions.
diff --git a/config/novel.go b/config/novel.go
@@ -1,6 +1,10 @@
 package config
 
-import "github.com/missdeer/getnovel/ebook"
+import (
+	"net/http"
+
+	"github.com/missdeer/getnovel/ebook"
+)
 
 type TOCPattern struct {
 	Host            string
@@ -34,7 +38,8 @@ type NovelSiteHandler struct {
 	CanHandle                func(string) bool                                  // (url) -> can handle
 	PreprocessChapterListURL func(string) string                                // (original url) -> final url
 	ExtractChapterList       func(string, []byte) (string, []*NovelChapterInfo) // (url, raw page content) (title, chapters)
-	ExtractChapterContent    func([]byte) []byte                                // (raw page content) -> cleanup content
+	ExtractChapterContent    func(string, []byte) []byte                        // (raw page content) -> cleanup content
+	PreprocessContentLink    func(string) (string, http.Header)                 // (url) -> (final url, headers)
 	Download                 func(string, ebook.IBook)
 	Begin                    func()
 	End                      func()

diff --git a/dlutil.go b/dlutil.go
@@ -26,28 +26,30 @@ type ContentInfo struct {
 	ContentFilePath string
 }
 type DownloadUtil struct {
-	ContentExtractor func([]byte) []byte
-	Generator        ebook.IBook
-	TempDir          string
-	CurrentPage      int32
-	MaxPage          int32
-	Quit             chan bool
-	Content          chan ContentInfo
-	Buffer           []ContentInfo
-	StartContent     *ContentInfo
-	EndContent       *ContentInfo
-	Ctx              context.Context
-	Semaphore        *semaphore.Weighted
+	ContentExtractor        func(string, []byte) []byte
+	ContentLinkPreprocessor func(string) (string, http.Header)
+	Generator               ebook.IBook
+	TempDir                 string
+	CurrentPage             int32
+	MaxPage                 int32
+	Quit                    chan bool
+	Content                 chan ContentInfo
+	Buffer                  []ContentInfo
+	StartContent            *ContentInfo
+	EndContent              *ContentInfo
+	Ctx                     context.Context
+	Semaphore               *semaphore.Weighted
 }
 
-func NewDownloadUtil(extractor func([]byte) []byte, generator ebook.IBook) (dlutil *DownloadUtil) {
+func NewDownloadUtil(contentExtractor func(string, []byte) []byte, contentLinkPreprocessor func(string) (string, http.Header), generator ebook.IBook) (dlutil *DownloadUtil) {
 	dlutil = &DownloadUtil{
-		ContentExtractor: extractor,
-		Generator:        generator,
-		Quit:             make(chan bool),
-		Ctx:              context.TODO(),
-		Semaphore:        semaphore.NewWeighted(config.Opts.ParallelCount),
-		Content:          make(chan ContentInfo),
+		ContentExtractor:        contentExtractor,
+		ContentLinkPreprocessor: contentLinkPreprocessor,
+		Generator:               generator,
+		Quit:                    make(chan bool),
+		Ctx:                     context.TODO(),
+		Semaphore:               semaphore.NewWeighted(config.Opts.ParallelCount),
+		Content:                 make(chan ContentInfo),
 	}
 	if config.Opts.FromChapter != 0 {
 		dlutil.StartContent = &ContentInfo{Index: config.Opts.FromChapter}
@@ -132,12 +134,15 @@ func (dlutil *DownloadUtil) AddURL(index int, title string, link string) (reachE
 			"Accept-Language":           []string{`en-US,en;q=0.8`},
 			"Upgrade-Insecure-Requests": []string{"1"},
 		}
+		if dlutil.ContentLinkPreprocessor != nil {
+			link, headers = dlutil.ContentLinkPreprocessor(link)
+		}
 		rawPageContent, err := httputil.GetBytes(link, headers, time.Duration(config.Opts.Timeout)*time.Second, config.Opts.RetryCount)
 		if err != nil {
 			log.Println("getting chapter content from", link, "failed ", err)
 			return
 		}
-		contentFd.Write(dlutil.ContentExtractor(rawPageContent))
+		contentFd.Write(dlutil.ContentExtractor(link, rawPageContent))
 		contentFd.Close()
 
 		dlutil.Content <- ContentInfo{

diff --git a/handler/69xinshu.go b/handler/69xinshu.go
@@ -57,7 +57,7 @@ func extract69xinshuChapterList(u string, rawPageContent []byte) (title string,
 	return
 }
 
-func extract69xinshuChapterContent(rawPageContent []byte) (c []byte) {
+func extract69xinshuChapterContent(u string, rawPageContent []byte) (c []byte) {
 	c = ic.Convert("gbk", "utf-8", rawPageContent)
 	doc, err := goquery.NewDocumentFromReader(bytes.NewReader(c))
 	if err != nil {

diff --git a/handler/7mao.go b/handler/7mao.go
@@ -0,0 +1,230 @@
+package handler
+
+import (
+	"bytes"
+	"crypto/aes"
+	"crypto/cipher"
+	"crypto/md5"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"log"
+	"net/http"
+	"net/url"
+	"regexp"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/PuerkitoBio/goquery"
+	"github.com/missdeer/getnovel/config"
+	"github.com/missdeer/golib/httputil"
+)
+
+func init() {
+	registerNovelSiteHandler(&config.NovelSiteHandler{
+		Sites: []config.NovelSite{
+			{
+				Title: `七猫`,
+				Urls:  []string{`https://www.qimao.com/`},
+			},
+		},
+		CanHandle: func(u string) bool {
+			patterns := []string{
+				`https://www\.qimao\.com/shuku/[0-9\-]+/`,
+			}
+			for _, pattern := range patterns {
+				reg := regexp.MustCompile(pattern)
+				if reg.MatchString(u) {
+					return true
+				}
+			}
+			return false
+		},
+		ExtractChapterList:    extractQimaoChapterList,
+		ExtractChapterContent: extractQimaoChapterContent,
+		PreprocessContentLink: preprocessQimaoChapterLink,
+	})
+}
+
+func preprocessQimaoChapterLink(u string) (string, http.Header) {
+	matchb, _ := url.Parse(u)
+
+	// 从URL中提取id和chapterId
+	paths := strings.Split(strings.Trim(matchb.Path, "/"), "/")
+	lastPath := paths[len(paths)-1]
+	ids := strings.Split(lastPath, "-")
+
+	if len(ids) < 2 {
+		fmt.Println("URL does not contain expected ids")
+		return u, http.Header{}
+	}
+
+	// 构造参数
+	params := map[string]string{
+		"id":        ids[0],
+		"chapterId": ids[1],
+	}
+
+	signKey := "d3dGiJc651gSQ8w1"
+	params["sign"] = generateMD5Sign(params, signKey)
+
+	// 构造Headers
+	headers := map[string]string{
+		"app-version":    "51110",
+		"platform":       "android",
+		"reg":            "0",
+		"AUTHORIZATION":  "",
+		"application-id": "com.****.reader",
+		"net-env":        "1",
+		"channel":        "unknown",
+		"qm-params":      "",
+	}
+	headers["sign"] = generateMD5Sign(headers, signKey)
+	// convert headers to http.Header
+	header := http.Header{}
+	for key, value := range headers {
+		header.Set(key, value)
+	}
+	// 构造最终请求URL
+	finalURL := "https://api-ks.wtzw.com/api/v1/chapter/content?" + toParams(params)
+	return finalURL, header
+}
+
+func extractQimaoChapterList(u string, rawPageContent []byte) (title string, chapters []*config.NovelChapterInfo) {
+	reg := regexp.MustCompile(`https://www\.qimao\.com/shuku/([0-9\-]+)/`)
+	// extract book id from url
+	ss := reg.FindAllStringSubmatch(u, -1)
+	s := ss[0]
+	if len(s) < 2 {
+		return
+	}
+	bookId := s[1]
+	// if bookId is xxxx-yyyy pattern, then split it and use xxxx as bookId
+	if strings.Contains(bookId, "-") {
+		bookId = strings.Split(bookId, "-")[0]
+	}
+	// extract chapter list, https://www.qimao.com/api/book/chapter-list?book_id=1710753
+	chapterListUrl := "https://www.qimao.com/api/book/chapter-list?book_id=" + bookId
+	chapterListResp, err := httputil.GetBytes(chapterListUrl, http.Header{}, 60*time.Second, 3)
+	if err != nil {
+		log.Println("get chapter list failed", err)
+		return
+	}
+	// unmarshal chapter list as JSON
+	var chapterList struct {
+		Data struct {
+			Chapters []struct {
+				Id    string `json:"id"`
+				Title string `json:"title"`
+				Index string `json:"index"`
+			} `json:"chapters"`
+		} `json:"data"`
+	}
+	err = json.Unmarshal(chapterListResp, &chapterList)
+	if err != nil {
+		log.Println("unmarshal chapter list failed", err)
+		return
+	}
+	for _, chapter := range chapterList.Data.Chapters {
+		chapters = append(chapters, &config.NovelChapterInfo{
+			Index: len(chapters),
+			Title: chapter.Title,
+			URL:   "https://www.qimao.com/shuku/" + bookId + "-" + chapter.Id,
+		})
+	}
+
+	// extract <title> tag from page content as title
+	doc, err := goquery.NewDocumentFromReader(bytes.NewReader(rawPageContent))
+	if err != nil {
+		log.Println("parse page content failed", err)
+		return
+	}
+	title = doc.Find("title").Text()
+	index := strings.Index(title, `免费阅读`)
+	if index > 0 {
+		title = title[:index]
+	}
+	return
+}
+
+func extractQimaoChapterContent(u string, rawPageContent []byte) (c []byte) {
+	var response QimaoArticleContentResponse
+	if err := json.Unmarshal(rawPageContent, &response); err != nil {
+		return
+	}
+
+	// 提取iv和密文
+	txt := response.Data.Content
+	iv := txt[:32]
+	content := txt[32:]
+
+	// 解密
+	decryptedContent, err := decrypt(content, iv)
+	if err != nil {
+		return
+	}
+
+	// 替换换行符
+	result := strings.ReplaceAll(decryptedContent, "<br>", "\n")
+	return []byte(result)
+}
+
+func toParams(params map[string]string) string {
+	var parts []string
+	for key, value := range params {
+		parts = append(parts, url.QueryEscape(key)+"="+url.QueryEscape(value))
+	}
+	return strings.Join(parts, "&")
+}
+
+func generateMD5Sign(params map[string]string, signKey string) string {
+	var keys []string
+	for key := range params {
+		keys = append(keys, key)
+	}
+	sort.Strings(keys)
+
+	var signString string
+	for _, key := range keys {
+		signString += key + "=" + params[key]
+	}
+	signString += signKey
+
+	return fmt.Sprintf("%x", md5.Sum([]byte(signString)))
+}
+
+// 假设QimaoArticleContentResponse是从API获取的结构体类型
+type QimaoArticleContentResponse struct {
+	Data struct {
+		Content string `json:"content"`
+	} `json:"data"`
+}
+
+// decrypt 解密函数
+func decrypt(data, ivString string) (string, error) {
+	key, _ := hex.DecodeString("32343263636238323330643730396531")
+	iv, _ := hex.DecodeString(ivString)
+
+	// 假设data是hex编码的，先转换为字节
+	cipherText, _ := hex.DecodeString(data)
+
+	block, err := aes.NewCipher(key)
+	if err != nil {
+		return "", err
+	}
+
+	if len(cipherText) < aes.BlockSize {
+		return "", err // Cipher text too short
+	}
+
+	// CBC模式解密
+	mode := cipher.NewCBCDecrypter(block, iv)
+	mode.CryptBlocks(cipherText, cipherText)
+
+	// PKCS#7 unpadding
+	unpadSize := int(cipherText[len(cipherText)-1])
+	cipherText = cipherText[:len(cipherText)-unpadSize]
+
+	return string(cipherText), nil
+}