From e8d7233ad98f4f50b73ed2e2bc9ab8415a95f315 Mon Sep 17 00:00:00 2001 From: Jim Geurts Date: Wed, 19 Mar 2014 12:01:18 -0500 Subject: [PATCH] Add in RSS 1.0 (rdf) support --- RssToEmail/Formatters/Rss10FeedFormatter.cs | 277 ++++++++++++++++++++ RssToEmail/Program.cs | 36 ++- RssToEmail/RssToEmail.csproj | 1 + 3 files changed, 307 insertions(+), 7 deletions(-) create mode 100644 RssToEmail/Formatters/Rss10FeedFormatter.cs diff --git a/RssToEmail/Formatters/Rss10FeedFormatter.cs b/RssToEmail/Formatters/Rss10FeedFormatter.cs new file mode 100644 index 0000000..938b5b7 --- /dev/null +++ b/RssToEmail/Formatters/Rss10FeedFormatter.cs @@ -0,0 +1,277 @@ +using System; +using System.Collections.ObjectModel; +using System.Xml; +using System.Collections.Generic; +using System.ServiceModel.Syndication; + +namespace RssToEmail.Formatters +{ + // From: http://www.4guysfromrolla.com/articles/031809-1.aspx + public class Rss10FeedFormatter : SyndicationFeedFormatter + { + #region Constructors + public Rss10FeedFormatter() { } + + public Rss10FeedFormatter(SyndicationFeed feed) : base(feed) { } + #endregion + + #region Properties + public override string Version + { + get { return "Rss10"; } + } + + public virtual string RdfNamespaceUri + { + get { return "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; } + } + + public virtual string NamespaceUri + { + get { return "http://purl.org/rss/1.0/"; } + } + #endregion + + #region Methods + #region Read XML Methods + public override bool CanRead(System.Xml.XmlReader reader) + { + if (reader == null) + throw new ArgumentNullException("reader"); + + return reader.IsStartElement("RDF", this.RdfNamespaceUri); + } + + protected override SyndicationFeed CreateFeedInstance() + { + return new SyndicationFeed(); + } + + public override void ReadFrom(System.Xml.XmlReader reader) + { + if (!this.CanRead(reader)) + throw new XmlException("Unknown RSS 1.0 feed format."); + + this.ReadFeed(reader); + } + + private void ReadFeed(XmlReader reader) + { + this.SetFeed(this.CreateFeedInstance()); + this.ReadXml(reader, base.Feed); + } + + protected virtual void ReadXml(XmlReader reader, SyndicationFeed result) + { + if (result == null) + throw new ArgumentNullException("result"); + else if (reader == null) + throw new ArgumentNullException("reader"); + + reader.ReadStartElement(); // Read in + reader.ReadStartElement("channel"); // Read in + while (reader.IsStartElement()) // Process children + { + if (reader.IsStartElement("title")) + result.Title = new TextSyndicationContent(reader.ReadElementString()); + else if (reader.IsStartElement("link")) + result.Links.Add(new SyndicationLink(new Uri(reader.ReadElementString()))); + else if (reader.IsStartElement("description")) + result.Description = new TextSyndicationContent(reader.ReadElementString()); + else + reader.Skip(); + } + reader.ReadEndElement(); // Read in + + while (reader.IsStartElement()) + { + if (reader.IsStartElement("item")) + { + result.Items = this.ReadItems(reader, result); + + break; + } + else + reader.Skip(); + } + } + + protected virtual IEnumerable ReadItems(XmlReader reader, SyndicationFeed feed) + { + if (feed == null) + throw new ArgumentNullException("feed"); + else if (reader == null) + throw new ArgumentNullException("reader"); + + Collection items = new Collection(); + while (reader.IsStartElement("item")) + { + items.Add(this.ReadItem(reader, feed)); + } + + return items; + } + + protected virtual SyndicationItem ReadItem(XmlReader reader, SyndicationFeed feed) + { + if (feed == null) + throw new ArgumentNullException("feed"); + else if (reader == null) + throw new ArgumentNullException("reader"); + + SyndicationItem result = new SyndicationItem(); + this.ReadItemFrom(reader, result); + + return result; + } + + protected virtual void ReadItemFrom(XmlReader reader, SyndicationItem result) + { + if (result == null) + throw new ArgumentNullException("result"); + else if (reader == null) + throw new ArgumentNullException("reader"); + + reader.ReadStartElement(); + while (reader.IsStartElement()) + { + if (reader.IsStartElement("title")) + result.Title = new TextSyndicationContent(reader.ReadElementString()); + else if (reader.IsStartElement("link")) + result.Links.Add(new SyndicationLink(new Uri(reader.ReadElementString()))); + else if (reader.IsStartElement("description")) + result.Summary = new TextSyndicationContent(reader.ReadElementString()); + else + reader.Skip(); + } + reader.ReadEndElement(); + } + #endregion + + #region Write XML Methods + public override void WriteTo(System.Xml.XmlWriter writer) + { + if (writer == null) + throw new ArgumentNullException("writer"); + + writer.WriteStartElement("rdf", "RDF", this.RdfNamespaceUri); // Write + writer.WriteAttributeString("xmlns", this.NamespaceUri); + this.WriteFeed(writer); + writer.WriteEndElement(); + } + + protected virtual void WriteFeed(XmlWriter writer) + { + if (writer == null) + throw new ArgumentNullException("writer"); + if (base.Feed == null) + throw new InvalidOperationException("Feed formatter does not have a feed."); + + string alternateLink = string.Empty, selfLink = string.Empty; + foreach (SyndicationLink lnk in base.Feed.Links) + { + if (alternateLink.Length == 0 && lnk.RelationshipType == "alternate") + alternateLink = lnk.Uri.IsAbsoluteUri ? lnk.Uri.AbsoluteUri : lnk.Uri.ToString(); + else if (selfLink.Length == 0 && lnk.RelationshipType == "self") + selfLink = lnk.Uri.IsAbsoluteUri ? lnk.Uri.AbsoluteUri : lnk.Uri.ToString(); + } + + if (selfLink.Length == 0 && alternateLink.Length > 0) + selfLink = alternateLink; + else if (alternateLink.Length == 0 && selfLink.Length > 0) + alternateLink = selfLink; + + + writer.WriteStartElement("channel"); // Write + writer.WriteAttributeString("about", this.RdfNamespaceUri, selfLink); + + if (base.Feed.Title == null || string.IsNullOrEmpty(base.Feed.Title.Text)) + throw new ArgumentException("Feed title required for RSS 1.0 feeds."); + writer.WriteElementString("title", base.Feed.Title.Text); // Write + + + if (alternateLink.Length == 0) + throw new ArgumentException("Feed link required for RSS 1.0 feeds."); + writer.WriteElementString("link", alternateLink); // Write <link> + + + if (base.Feed.Description == null || string.IsNullOrEmpty(base.Feed.Description.Text)) + throw new ArgumentException("Feed title required for RSS 1.0 feeds."); + writer.WriteElementString("description", base.Feed.Description.Text); // Write <description> + + + writer.WriteStartElement("items"); // Write <items> + writer.WriteStartElement("Seq", this.RdfNamespaceUri); + foreach (SyndicationItem item in base.Feed.Items) + { + string itemAlternateLink = GetAlternateLinkForItem(item); + + writer.WriteStartElement("li", this.RdfNamespaceUri); + writer.WriteAttributeString("resource", this.RdfNamespaceUri, itemAlternateLink); + writer.WriteEndElement(); + } + writer.WriteEndElement(); + writer.WriteEndElement(); + + writer.WriteEndElement(); // Write </channel> + + // Write the <item> elements + this.WriteItems(writer, base.Feed.Items); + } + + protected virtual void WriteItems(XmlWriter writer, IEnumerable<SyndicationItem> items) + { + if (writer == null) + throw new ArgumentNullException("writer"); + + if (items != null) + foreach (SyndicationItem item in items) + this.WriteItem(writer, item); + } + + protected virtual void WriteItem(XmlWriter writer, SyndicationItem item) + { + if (writer == null) + throw new ArgumentNullException("writer"); + + string alternateLink = GetAlternateLinkForItem(item); + + writer.WriteStartElement("item"); // Write <item> + writer.WriteAttributeString("about", this.RdfNamespaceUri, alternateLink); + this.WriteItemContents(writer, item); + writer.WriteEndElement(); + } + + protected virtual void WriteItemContents(XmlWriter writer, SyndicationItem item) + { + if (writer == null) + throw new ArgumentNullException("writer"); + + if (item.Title == null || string.IsNullOrEmpty(item.Title.Text)) + throw new ArgumentException("Feed title required for RSS 1.0 feeds."); + writer.WriteElementString("title", item.Title.Text); // Write <title> + + + string alternateLink = GetAlternateLinkForItem(item); + if (string.IsNullOrEmpty(alternateLink)) + throw new ArgumentException("Feed link required for RSS 1.0 feeds."); + writer.WriteElementString("link", alternateLink); // Write <link> + + + if (base.Feed.Description != null && string.IsNullOrEmpty(item.Summary.Text) == false) + writer.WriteElementString("description", item.Summary.Text); // Write the optional <description> + } + + private string GetAlternateLinkForItem(SyndicationItem item) + { + foreach (SyndicationLink lnk in item.Links) + if (lnk.RelationshipType == "alternate") + return lnk.Uri.IsAbsoluteUri ? lnk.Uri.AbsoluteUri : lnk.Uri.ToString(); + + // If we reach here, return an empty string + return string.Empty; + } + #endregion + #endregion + } +} diff --git a/RssToEmail/Program.cs b/RssToEmail/Program.cs index 172c172..49a1cdd 100644 --- a/RssToEmail/Program.cs +++ b/RssToEmail/Program.cs @@ -10,6 +10,7 @@ using Raven.Client.Embedded; using Raven.Database.Server; using RssToEmail.Extensions; +using RssToEmail.Formatters; using RssToEmail.Models; using log4net; @@ -60,8 +61,29 @@ static void Main(string[] args) stopwatch.Start(); using (var f = XmlReader.Create(url)) { - var feed = SyndicationFeed.Load(f); - var from = new MailAddress(ConfigurationManager.AppSettings["from"], feed.Title.Text); + SyndicationFeed feed; + + // Try rdf first + var rss10FeedParser = new Rss10FeedFormatter(); + if (rss10FeedParser.CanRead(f)) + { + rss10FeedParser.ReadFrom(f); + feed = rss10FeedParser.Feed; + } + else + { + feed = SyndicationFeed.Load(f); + } + var fromTitle = feed.Title.Text; + if (string.IsNullOrWhiteSpace(fromTitle)) + { + var author = feed.Authors.FirstOrDefault(); + if (author != null) + { + fromTitle = author.Name; + } + } + var from = new MailAddress(ConfigurationManager.AppSettings["from"], fromTitle); bool? supportsContentEncoding = null; foreach (var item in feed.Items.Reverse()) @@ -71,16 +93,16 @@ static void Main(string[] args) if (linkUri != null) link = linkUri.Uri.ToString().Split('?').First(); - var id = item.Id; - if (item.Id.Contains("?key=")) + var id = item.Id ?? link; + if (id.Contains("?key=")) { - id = item.Id.Split(new [] { "?key=" }, StringSplitOptions.RemoveEmptyEntries)[1]; + id = id.Split(new [] { "?key=" }, StringSplitOptions.RemoveEmptyEntries)[1]; } // Ignore previously processed items if (savedFeed.SentItems.Any(x => x.Id.Equals(id, StringComparison.OrdinalIgnoreCase) || - x.Id.Equals(item.Id, StringComparison.OrdinalIgnoreCase) || + x.Id.Equals(item.Id ?? "ignore in this case", StringComparison.OrdinalIgnoreCase) || (!string.IsNullOrEmpty(x.Url) && x.Url == link))) continue; @@ -121,7 +143,7 @@ static void Main(string[] args) } var message = new MailMessage(from, to) { - Subject = "[New Post] " + item.Title.Text, + Subject = "[New Post] " + HttpUtility.HtmlDecode(item.Title.Text), Body = content + string.Format("<p style=\"font-size:12px;line-height:1.4em;margin:10px 0px 10px 0px\">View the original article: <a href=\"{0}\">{0}</a></p>", link), IsBodyHtml = true }; diff --git a/RssToEmail/RssToEmail.csproj b/RssToEmail/RssToEmail.csproj index 1a2dc9a..2b1ce7d 100644 --- a/RssToEmail/RssToEmail.csproj +++ b/RssToEmail/RssToEmail.csproj @@ -92,6 +92,7 @@ </ItemGroup> <ItemGroup> <Compile Include="Extensions\TimeSpanExtensions.cs" /> + <Compile Include="Formatters\Rss10FeedFormatter.cs" /> <Compile Include="Models\FeedItem.cs" /> <Compile Include="Models\Feed.cs" /> <Compile Include="Program.cs" />