Skip to content

Commit

Permalink
Merge pull request #20 from planetary-social/causes
Browse files Browse the repository at this point in the history
Add causes support to rsslay
  • Loading branch information
boreq authored Aug 31, 2023
2 parents a7817dd + 2627b30 commit f9c763c
Show file tree
Hide file tree
Showing 2 changed files with 248 additions and 2 deletions.
38 changes: 38 additions & 0 deletions pkg/feed/downloader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package feed

import (
"context"
"fmt"
"io"
"net/http"
"time"
)

type Downloader struct {
}

func NewDownloader() *Downloader {
return &Downloader{}
}

func (*Downloader) Download(url string) (io.ReadCloser, error) {
client := http.Client{Timeout: 30 * time.Second}

req, err := http.NewRequestWithContext(context.Background(), "GET", url, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "rsslay")

resp, err := client.Do(req)
if err != nil {
return nil, err
}

if resp.StatusCode < 200 || resp.StatusCode >= 300 {
resp.Body.Close()
return nil, fmt.Errorf("http error %d", resp.StatusCode)
}

return resp.Body, nil
}
212 changes: 210 additions & 2 deletions pkg/feed/feed.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package feed

import (
"context"
"crypto/hmac"
"crypto/sha256"
"database/sql"
Expand All @@ -10,6 +11,7 @@ import (
"fmt"
"log"
"net/http"
"strconv"
"strings"
"time"

Expand All @@ -22,6 +24,11 @@ import (
"github.com/prometheus/client_golang/prometheus"
)

const (
causesLink = "https://www.causes.com/api/v2/articles?feed_id=recency"
causesNumWorkers = 10
)

var (
fp = gofeed.NewParser()
client = &http.Client{
Expand Down Expand Up @@ -51,6 +58,10 @@ var types = []string{
}

func GetFeedURL(url string) string {
if url == causesLink {
return causesLink
}

resp, err := client.Get(url)
if err != nil || resp.StatusCode >= 300 {
return ""
Expand Down Expand Up @@ -102,8 +113,9 @@ func ParseFeed(url string) (*gofeed.Feed, error) {
}

metrics.CacheMiss.Inc()
fp.RSSTranslator = NewCustomTranslator()
feed, err := fp.ParseURL(url)

parser := getFeedParser(url)
feed, err := parser.Parse()
if err != nil {
return nil, err
}
Expand All @@ -121,6 +133,17 @@ func ParseFeed(url string) (*gofeed.Feed, error) {
return feed, nil
}

func getFeedParser(feedURL string) FeedParser {
downloader := NewDownloader()

switch feedURL {
case causesLink:
return NewCausesFeedParser(downloader, feedURL)
default:
return NewDefaultFeedParser(downloader, feedURL)
}
}

func EntryFeedToSetMetadata(pubkey string, feed *gofeed.Feed, originalUrl string, enableAutoRegistration bool, defaultProfilePictureUrl string, mainDomainName string) nostr.Event {
// Handle Nitter special cases (http schema)
if strings.Contains(feed.Description, "Twitter feed") {
Expand Down Expand Up @@ -193,3 +216,188 @@ func DeleteInvalidFeed(url string, db *sql.DB) {
log.Printf("[DEBUG] deleted invalid feed with url %q", url)
}
}

type FeedParser interface {
Parse() (*gofeed.Feed, error)
}

type DefaultFeedParser struct {
downloader *Downloader
url string
}

func NewDefaultFeedParser(downloader *Downloader, url string) *DefaultFeedParser {
return &DefaultFeedParser{downloader: downloader, url: url}
}

func (d *DefaultFeedParser) Parse() (*gofeed.Feed, error) {
body, err := d.downloader.Download(d.url)
if err != nil {
return nil, err
}
defer body.Close()

fp.RSSTranslator = NewCustomTranslator()
return fp.Parse(body)
}

type causesResponseOrError struct {
Response causesResponse
Err error
}

type CausesFeedParser struct {
downloader *Downloader
url string
}

func NewCausesFeedParser(downloader *Downloader, url string) *CausesFeedParser {
return &CausesFeedParser{downloader: downloader, url: url}
}

func (d *CausesFeedParser) Parse() (*gofeed.Feed, error) {
resp, err := d.get(d.url)
if err != nil {
return nil, err
}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()

chIn := make(chan int)
chOut := make(chan causesResponseOrError)

d.startWorkers(ctx, chIn, chOut, causesNumWorkers)

go func() {
defer close(chIn)

for i := 1; i <= resp.Meta.Pagination.TotalPages; i++ {
select {
case chIn <- i:
continue
case <-ctx.Done():
return
}
}
}()

feed := &gofeed.Feed{
Title: "causes.com",
Description: "Causes - powered by Countable - makes it quick and easy to understand the laws Congress is considering.",
Link: "https://www.causes.com/",
FeedLink: causesLink,
Links: nil,
Items: nil,
}

for i := 1; i <= resp.Meta.Pagination.TotalPages; i++ {
select {
case result := <-chOut:
if err := result.Err; err != nil {
return nil, fmt.Errorf("worker error: %w", err)
}

for _, article := range result.Response.Articles {
article := article
item := d.itemFromArticle(article)
feed.Items = append(feed.Items, item)
}
case <-ctx.Done():
return nil, ctx.Err()
}
}

return feed, nil
}

func (d *CausesFeedParser) startWorkers(ctx context.Context, chIn <-chan int, chOut chan<- causesResponseOrError, n int) {
for i := 0; i < n; i++ {
go d.startWorker(ctx, chIn, chOut)
}
}

func (d *CausesFeedParser) startWorker(ctx context.Context, chIn <-chan int, chOut chan<- causesResponseOrError) {
for {
select {
case in := <-chIn:
result, err := d.work(in)
if err != nil {
select {
case chOut <- causesResponseOrError{Err: err}:
continue
case <-ctx.Done():
return
}
}

select {
case chOut <- causesResponseOrError{Response: result}:
continue
case <-ctx.Done():
return
}
case <-ctx.Done():
return
}
}
}

func (d *CausesFeedParser) work(page int) (causesResponse, error) {
return d.get(fmt.Sprintf("%s&page=%d", d.url, page))
}

func (d *CausesFeedParser) get(url string) (causesResponse, error) {
var resp causesResponse

body, err := d.downloader.Download(url)
if err != nil {
return resp, err
}
defer body.Close()

if err := json.NewDecoder(body).Decode(&resp); err != nil {
return resp, err
}

return resp, nil
}

func (d *CausesFeedParser) itemFromArticle(article causesResponseArticle) *gofeed.Item {
return &gofeed.Item{
Title: article.Title,
Content: article.HtmlContent,
Link: article.Links.Self,
Published: article.CreatedAt.Format(time.RFC3339),
PublishedParsed: &article.CreatedAt,
GUID: strconv.Itoa(article.Id),
}

}

type causesResponse struct {
Articles []causesResponseArticle `json:"articles"`
Meta causesResponseMeta `json:"meta"`
}

type causesResponseArticle struct {
Id int `json:"id"`
Title string `json:"title"`
CreatedAt time.Time `json:"created_at"`
HtmlContent string `json:"html_content"`
Links causesResponseArticleLinks `json:"links"`
}

type causesResponseArticleLinks struct {
Self string `json:"self"`
}

type causesResponseMeta struct {
Pagination causesResponseMetaPagination `json:"pagination"`
}

type causesResponseMetaPagination struct {
CurrentPage int `json:"current_page"`
TotalPages int `json:"total_pages"`
TotalCount int `json:"total_count"`
}

0 comments on commit f9c763c

Please sign in to comment.