From 65b164fa244e5438d66836a1bf8ee1d1e2f5e6ed Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Wed, 13 Jan 2021 10:17:18 -0700 Subject: [PATCH 01/11] async index.js --- scrapers/index.js | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/scrapers/index.js b/scrapers/index.js index 9be5c15..3af002d 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -45,20 +45,18 @@ const domains = { yummly: require("./yummly") }; -const recipeScraper = url => { - return new Promise((resolve, reject) => { - let parse = parseDomain(url); - if (parse) { - let domain = parse.domain; - if (domains[domain] !== undefined) { - resolve(domains[domain](url)); - } else { - reject(new Error("Site not yet supported")); - } +const recipeScraper = async url => { + let parse = parseDomain(url); + if (parse) { + let domain = parse.domain; + if (domains[domain] !== undefined) { + return await domains[domain](url); } else { - reject(new Error("Failed to parse domain")); + throw new Error("Site not yet supported"); } - }); + } else { + throw new Error("Failed to parse domain"); + } }; module.exports = recipeScraper; From d87fa2f1646a039e792ee0abb95977769a4fa376 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Wed, 13 Jan 2021 11:43:42 -0700 Subject: [PATCH 02/11] adding base / factory class and first sub class --- helpers/BaseScraper.js | 83 +++++++++++++++++++++++++++++ helpers/ScraperFactory.js | 77 ++++++++++++++++++++++++++ package.json | 1 + scrapers/AmbitiousKitchenScraper.js | 56 +++++++++++++++++++ scrapers/index.js | 64 +++------------------- 5 files changed, 225 insertions(+), 56 deletions(-) create mode 100644 helpers/BaseScraper.js create mode 100644 helpers/ScraperFactory.js create mode 100644 scrapers/AmbitiousKitchenScraper.js diff --git a/helpers/BaseScraper.js b/helpers/BaseScraper.js new file mode 100644 index 0000000..c81b283 --- /dev/null +++ b/helpers/BaseScraper.js @@ -0,0 +1,83 @@ +"use strict"; + +const fetch = require("node-fetch"); +const cheerio = require("cheerio"); + +const RecipeSchema = require("./recipe-schema"); + +/** + * + */ +class BaseScraper { + constructor(url) { + this.url = url; + this.subUrl = ""; + this.recipe = new RecipeSchema(); + } + + /** + * + */ + checkUrl() { + if (!this.url.includes(this.subUrl)) { + throw new Error(`url provided must includes '${this.subUrl}'`); + } + } + + /** + * Fetches html from url + * @returns {object} - Cheerio instance + */ + async fetchDOMModel() { + try { + const res = await fetch(this.url); + const html = await res.text(); + return cheerio.load(html); + } catch (err) { + throw new Error("No recide found on page"); + } + } + + /** + * Abstract method + * @param {object} $ - cheerio instance + * @returns {object} - an object representing the recipe + */ + scrape($) { + throw new Error("scrape is not defined in BaseScraper"); + } + + /** + * + */ + async fetchRecipe() { + this.checkUrl(); + const $ = await this.fetchDOMModel(); + this.scrape($); + return this.validateRecipe(); + } + + /** + * + */ + setImage($) { + this.recipe.image = $("meta[property='og:image']").attr("content"); + } + + /** + * + */ + // TODO build recipe schema + validateRecipe() { + if ( + !this.recipe.name || + !this.recipe.ingredients.length || + !this.recipe.instructions.length + ) { + throw new Error("No recipe found on page"); + } + return this.recipe; + } +} + +module.exports = BaseScraper; diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js new file mode 100644 index 0000000..126a21a --- /dev/null +++ b/helpers/ScraperFactory.js @@ -0,0 +1,77 @@ +"use strict"; + +const parseDomain = require("parse-domain"); + +const domains = { + "101cookbooks": require("../scrapers/101cookbooks"), + allrecipes: require("../scrapers/allrecipes"), + ambitiouskitchen: require("../scrapers/AmbitiousKitchenScraper"), + averiecooks: require("../scrapers/averiecooks"), + bbc: require("../scrapers/bbc"), + bbcgoodfood: require("../scrapers/bbcgoodfood"), + bonappetit: require("../scrapers/bonappetit"), + budgetbytes: require("../scrapers/budgetbytes"), + centraltexasfoodbank: require("../scrapers/centraltexasfoodbank"), + closetcooking: require("../scrapers/closetcooking"), + cookieandkate: require("../scrapers/cookieandkate"), + copykat: require("../scrapers/copykat"), + damndelicious: require("../scrapers/damndelicious"), + eatingwell: require("../scrapers/eatingwell"), + epicurious: require("../scrapers/epicurious"), + food: require("../scrapers/food"), + foodandwine: require("../scrapers/foodandwine"), + foodnetwork: require("../scrapers/foodnetwork"), + gimmedelicious: require("../scrapers/gimmedelicious"), + gimmesomeoven: require("../scrapers/gimmesomeoven"), + julieblanner: require("../scrapers/julieblanner"), + kitchenstories: require("../scrapers/kitchenstories"), + melskitchencafe: require("../scrapers/melskitchencafe"), + minimalistbaker: require("../scrapers/minimalistbaker"), + myrecipes: require("../scrapers/myrecipes"), + nomnompaleo: require("../scrapers/nomnompaleo"), + omnivorescookbook: require("../scrapers/omnivorescookbook"), + pinchofyum: require("../scrapers/pinchofyum"), + recipetineats: require("../scrapers/recipetineats"), + seriouseats: require("../scrapers/seriouseats"), + simplyrecipes: require("../scrapers/simplyrecipes"), + smittenkitchen: require("../scrapers/smittenkitchen"), + tastesbetterfromscratch: require("../scrapers/tastesbetterfromscratch"), + tasteofhome: require("../scrapers/tasteofhome"), + theblackpeppercorn: require("../scrapers/theblackpeppercorn"), + therecipecritic: require("../scrapers/therecipecritic"), + thepioneerwoman: require("../scrapers/thepioneerwoman"), + therealfoodrds: require("../scrapers/therealfoodrds"), + thespruceeats: require("../scrapers/thespruceeats"), + whatsgabycooking: require("../scrapers/whatsgabycooking"), + woolworths: require("../scrapers/woolworths"), + yummly: require("../scrapers/yummly") +}; + +const instances = {}; + +/** + * A Singleton Factory to whom supplies an instance of a scraper based on a give URL + */ +class ScraperFactory { + getScraper(url) { + let parse = parseDomain(url); + if (parse) { + let domain = parse.domain; + if (!instances[domain]) { + if (domains[domain] !== undefined) { + instances[domain] = new domains[domain](url); + } else { + throw new Error("Site not yet supported"); + } + } + return instances[domain]; + } else { + throw new Error("Failed to parse domain"); + } + } +} + +const singletonFactory = new ScraperFactory(); +Object.freeze(singletonFactory); + +module.exports = singletonFactory; diff --git a/package.json b/package.json index 3a4545a..2617b8b 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "homepage": "https://github.com/jadkins89/Recipe-Scraper#readme", "dependencies": { "cheerio": "^1.0.0-rc.3", + "node-fetch": "^2.6.1", "parse-domain": "^2.3.2", "puppeteer": "^1.20.0", "request": "^2.88.0" diff --git a/scrapers/AmbitiousKitchenScraper.js b/scrapers/AmbitiousKitchenScraper.js new file mode 100644 index 0000000..386af26 --- /dev/null +++ b/scrapers/AmbitiousKitchenScraper.js @@ -0,0 +1,56 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +class AmbitiousKitchenScraper extends BaseScraper { + constructor(url) { + super(url); + this.subUrl = "ambitiouskitchen.com/"; + } + + scrape($) { + this.setImage($); + this.recipe.name = $(".wprm-recipe-name").text(); + const { ingredients, instructions, time } = this.recipe; + + $(".wprm-recipe-ingredient").each((i, el) => { + let amount = $(el) + .find(".wprm-recipe-ingredient-amount") + .text(); + let unit = $(el) + .find(".wprm-recipe-ingredient-unit") + .text(); + let name = $(el) + .find(".wprm-recipe-ingredient-name") + .text(); + let ingredient = `${amount} ${unit} ${name}` + .replace(/\s\s+/g, " ") + .trim(); + ingredients.push(ingredient); + }); + + $(".wprm-recipe-instruction").each((i, el) => { + instructions.push( + $(el) + .text() + .trim() + ); + }); + + time.prep = + `${$(".wprm-recipe-prep_time").text()} ${$( + ".wprm-recipe-prep_time-unit" + ).text()}` || ""; + time.cook = + `${$(".wprm-recipe-cook_time").text()} ${$( + ".wprm-recipe-cook_time-unit" + ).text()}` || ""; + time.total = + `${$(".wprm-recipe-total_time").text()} ${$( + ".wprm-recipe-total_time-unit" + ).text()}` || ""; + this.recipe.servings = $(".wprm-recipe-servings").text() || ""; + } +} + +module.exports = AmbitiousKitchenScraper; diff --git a/scrapers/index.js b/scrapers/index.js index 3af002d..b8759de 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -1,62 +1,14 @@ -const parseDomain = require("parse-domain"); +"use strict"; -const domains = { - "101cookbooks": require("./101cookbooks"), - allrecipes: require("./allrecipes"), - ambitiouskitchen: require("./ambitiouskitchen"), - averiecooks: require("./averiecooks"), - bbc: require("./bbc"), - bbcgoodfood: require("./bbcgoodfood"), - bonappetit: require("./bonappetit"), - budgetbytes: require("./budgetbytes"), - centraltexasfoodbank: require("./centraltexasfoodbank"), - closetcooking: require("./closetcooking"), - cookieandkate: require("./cookieandkate"), - copykat: require("./copykat"), - damndelicious: require("./damndelicious"), - eatingwell: require("./eatingwell"), - epicurious: require("./epicurious"), - food: require("./food"), - foodandwine: require("./foodandwine"), - foodnetwork: require("./foodnetwork"), - gimmedelicious: require("./gimmedelicious"), - gimmesomeoven: require("./gimmesomeoven"), - julieblanner: require("./julieblanner"), - kitchenstories: require("./kitchenstories"), - melskitchencafe: require("./melskitchencafe"), - minimalistbaker: require("./minimalistbaker"), - myrecipes: require("./myrecipes"), - nomnompaleo: require("./nomnompaleo"), - omnivorescookbook: require("./omnivorescookbook"), - pinchofyum: require("./pinchofyum"), - recipetineats: require("./recipetineats"), - seriouseats: require("./seriouseats"), - simplyrecipes: require("./simplyrecipes"), - smittenkitchen: require("./smittenkitchen"), - tastesbetterfromscratch: require("./tastesbetterfromscratch"), - tasteofhome: require("./tasteofhome"), - theblackpeppercorn: require("./theblackpeppercorn"), - therecipecritic: require("./therecipecritic"), - thepioneerwoman: require("./thepioneerwoman"), - therealfoodrds: require("./therealfoodrds"), - thespruceeats: require("./thespruceeats"), - whatsgabycooking: require("./whatsgabycooking"), - woolworths: require("./woolworths"), - yummly: require("./yummly") -}; +const ScraperFactory = require("../helpers/ScraperFactory"); const recipeScraper = async url => { - let parse = parseDomain(url); - if (parse) { - let domain = parse.domain; - if (domains[domain] !== undefined) { - return await domains[domain](url); - } else { - throw new Error("Site not yet supported"); - } - } else { - throw new Error("Failed to parse domain"); - } + let klass = ScraperFactory.getScraper(url); + return await klass.fetchRecipe(); }; +recipeScraper( + "https://www.ambitiouskitchen.com/street-corn-pasta-salad-with-cilantro-pesto-goat-cheese/" +).then(recipe => console.log(recipe)); + module.exports = recipeScraper; From 94d8963be6fcf1c85d067367a95bdd4f7a40af95 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Wed, 13 Jan 2021 12:32:31 -0700 Subject: [PATCH 03/11] converting 3 more scrapes / updating testing logic --- helpers/BaseScraper.js | 36 +++++----- helpers/ScraperFactory.js | 6 +- scrapers/101cookbooks.js | 69 ------------------ scrapers/AmbitiousKitchenScraper.js | 5 +- scrapers/allrecipes.js | 105 ---------------------------- scrapers/ambitiouskitchen.js | 73 ------------------- scrapers/averiecooks.js | 85 ---------------------- scrapers/index.js | 4 -- test/101cookbooks.test.js | 10 +-- test/allRecipes.test.js | 34 +++++---- test/ambitiouskitchen.test.js | 10 +-- test/averiecooks.test.js | 5 +- test/helpers/commonRecipeTest.js | 28 +++++--- 13 files changed, 72 insertions(+), 398 deletions(-) delete mode 100644 scrapers/101cookbooks.js delete mode 100644 scrapers/allrecipes.js delete mode 100644 scrapers/ambitiouskitchen.js delete mode 100644 scrapers/averiecooks.js diff --git a/helpers/BaseScraper.js b/helpers/BaseScraper.js index c81b283..d6ff3e5 100644 --- a/helpers/BaseScraper.js +++ b/helpers/BaseScraper.js @@ -9,10 +9,9 @@ const RecipeSchema = require("./recipe-schema"); * */ class BaseScraper { - constructor(url) { + constructor(url, subUrl = "") { this.url = url; - this.subUrl = ""; - this.recipe = new RecipeSchema(); + this.subUrl = subUrl; } /** @@ -20,10 +19,21 @@ class BaseScraper { */ checkUrl() { if (!this.url.includes(this.subUrl)) { - throw new Error(`url provided must includes '${this.subUrl}'`); + throw new Error(`url provided must include '${this.subUrl}'`); } } + createRecipeObject() { + this.recipe = new RecipeSchema(); + } + + /** + * + */ + defaultSetImage($) { + this.recipe.image = $("meta[property='og:image']").attr("content"); + } + /** * Fetches html from url * @returns {object} - Cheerio instance @@ -38,30 +48,24 @@ class BaseScraper { } } - /** - * Abstract method - * @param {object} $ - cheerio instance - * @returns {object} - an object representing the recipe - */ - scrape($) { - throw new Error("scrape is not defined in BaseScraper"); - } - /** * */ async fetchRecipe() { this.checkUrl(); const $ = await this.fetchDOMModel(); + this.createRecipeObject(); this.scrape($); return this.validateRecipe(); } /** - * + * Abstract method + * @param {object} $ - cheerio instance + * @returns {object} - an object representing the recipe */ - setImage($) { - this.recipe.image = $("meta[property='og:image']").attr("content"); + scrape($) { + throw new Error("scrape is not defined in BaseScraper"); } /** diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index 126a21a..57b46ee 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -3,10 +3,10 @@ const parseDomain = require("parse-domain"); const domains = { - "101cookbooks": require("../scrapers/101cookbooks"), - allrecipes: require("../scrapers/allrecipes"), + "101cookbooks": require("../scrapers/101CookbooksScraper"), + allrecipes: require("../scrapers/AllRecipesScraper"), ambitiouskitchen: require("../scrapers/AmbitiousKitchenScraper"), - averiecooks: require("../scrapers/averiecooks"), + averiecooks: require("../scrapers/AverieCooksScraper"), bbc: require("../scrapers/bbc"), bbcgoodfood: require("../scrapers/bbcgoodfood"), bonappetit: require("../scrapers/bonappetit"), diff --git a/scrapers/101cookbooks.js b/scrapers/101cookbooks.js deleted file mode 100644 index bdca857..0000000 --- a/scrapers/101cookbooks.js +++ /dev/null @@ -1,69 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const oneHundredAndOne = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("101cookbooks.com/")) { - reject(new Error("url provided must include '101cookbooks.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - const body = $(".wprm-recipe-container"); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = body.children("h2").text(); - - $(".wprm-recipe-ingredient").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .trim() - ); - }); - - $(".wprm-recipe-instruction-group").each((i, el) => { - Recipe.instructions.push( - $(el) - .children(".wprm-recipe-group-name") - .text() - ); - $(el) - .find(".wprm-recipe-instruction-text") - .each((i, elChild) => { - Recipe.instructions.push($(elChild).text()); - }); - }); - - Recipe.time.prep = $($(".wprm-recipe-time").get(1)).text(); - Recipe.time.total = $(".wprm-recipe-time") - .last() - .text(); - - Recipe.servings = $(".wprm-recipe-time") - .first() - .text() - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = oneHundredAndOne; diff --git a/scrapers/AmbitiousKitchenScraper.js b/scrapers/AmbitiousKitchenScraper.js index 386af26..fb93e65 100644 --- a/scrapers/AmbitiousKitchenScraper.js +++ b/scrapers/AmbitiousKitchenScraper.js @@ -4,12 +4,11 @@ const BaseScraper = require("../helpers/BaseScraper"); class AmbitiousKitchenScraper extends BaseScraper { constructor(url) { - super(url); - this.subUrl = "ambitiouskitchen.com/"; + super(url, "ambitiouskitchen.com/"); } scrape($) { - this.setImage($); + this.defaultSetImage($); this.recipe.name = $(".wprm-recipe-name").text(); const { ingredients, instructions, time } = this.recipe; diff --git a/scrapers/allrecipes.js b/scrapers/allrecipes.js deleted file mode 100644 index ce7b8e8..0000000 --- a/scrapers/allrecipes.js +++ /dev/null @@ -1,105 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const allRecipes = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("allrecipes.com/recipe")) { - reject(new Error("url provided must include 'allrecipes.com/recipe'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - // Check if recipe is in new format - if ((Recipe.name = $(".intro").text())) { - newAllRecipes($, Recipe); - } else if ((Recipe.name = $("#recipe-main-content").text())) { - oldAllRecipes($, Recipe); - } else { - reject(new Error("No recipe found on page")); - } - resolve(Recipe); - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -const newAllRecipes = ($, Recipe) => { - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = Recipe.name.replace(/\s\s+/g, ""); - - $(".recipe-meta-item").each((i, el) => { - const title = $(el) - .children(".recipe-meta-item-header") - .text() - .replace(/\s*:|\s+(?=\s*)/g, ""); - const value = $(el) - .children(".recipe-meta-item-body") - .text() - .replace(/\s\s+/g, ""); - switch (title) { - case "prep": - Recipe.time.prep = value; - break; - case "cook": - Recipe.time.cook = value; - break; - case "total": - Recipe.time.total = value; - break; - case "additional": - Recipe.time.inactive = value; - break; - case "Servings": - Recipe.servings = value; - break; - default: - break; - } - }); - - $(".ingredients-item").each((i, el) => { - const ingredient = $(el) - .text() - .replace(/\s\s+/g, " ") - .trim(); - Recipe.ingredients.push(ingredient); - }); - $($(".instructions-section-item").find("p")).each((i, el) => { - const instruction = $(el).text(); - Recipe.instructions.push(instruction); - }); -}; - -const oldAllRecipes = ($, Recipe) => { - Recipe.image = $("meta[property='og:image']").attr("content"); - - $("#polaris-app label").each((i, el) => { - const item = $(el) - .text() - .replace(/\s\s+/g, ""); - if (item != "Add all ingredients to list" && item != "") { - Recipe.ingredients.push(item); - } - }); - - $(".step").each((i, el) => { - const step = $(el) - .text() - .replace(/\s\s+/g, ""); - if (step != "") { - Recipe.instructions.push(step); - } - }); - Recipe.time.prep = $("time[itemprop=prepTime]").text(); - Recipe.time.cook = $("time[itemprop=cookTime]").text(); - Recipe.time.ready = $("time[itemprop=totalTime]").text(); - Recipe.servings = $("#metaRecipeServings").attr("content"); -}; - -module.exports = allRecipes; diff --git a/scrapers/ambitiouskitchen.js b/scrapers/ambitiouskitchen.js deleted file mode 100644 index a99776c..0000000 --- a/scrapers/ambitiouskitchen.js +++ /dev/null @@ -1,73 +0,0 @@ -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const puppeteerFetch = require("../helpers/puppeteerFetch"); - -const ambitiousKitchen = url => { - return new Promise(async (resolve, reject) => { - if (!url.includes("ambitiouskitchen.com")) { - reject(new Error("url provided must include 'ambitiouskitchen.com'")); - } else { - try { - const html = await puppeteerFetch(url); - const Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name").text(); - - $(".wprm-recipe-ingredient").each((i, el) => { - let amount = $(el) - .find(".wprm-recipe-ingredient-amount") - .text(); - let unit = $(el) - .find(".wprm-recipe-ingredient-unit") - .text(); - let name = $(el) - .find(".wprm-recipe-ingredient-name") - .text(); - let ingredient = `${amount} ${unit} ${name}` - .replace(/\s\s+/g, " ") - .trim(); - Recipe.ingredients.push(ingredient); - }); - - $(".wprm-recipe-instruction").each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .replace(/\s\s+/g, "") - ); - }); - - Recipe.time.prep = - $(".wprm-recipe-prep_time").text() + - " " + - $(".wprm-recipe-prep_time-unit").text() || ""; - Recipe.time.cook = - $(".wprm-recipe-cook_time").text() + - " " + - $(".wprm-recipe-cook_time-unit").text() || ""; - Recipe.time.total = - $(".wprm-recipe-total_time").text() + - " " + - $(".wprm-recipe-total_time-unit").text() || ""; - Recipe.servings = $(".wprm-recipe-servings").text() || ""; - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = ambitiousKitchen; diff --git a/scrapers/averiecooks.js b/scrapers/averiecooks.js deleted file mode 100644 index 7b0c5c6..0000000 --- a/scrapers/averiecooks.js +++ /dev/null @@ -1,85 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const averieCooks = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("averiecooks.com")) { - reject(new Error("url provided must include 'averiecooks.com'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".innerrecipe") - .children("h2") - .first() - .text(); - - $(".cookbook-ingredients-list") - .children("li") - .each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .trim() - .replace(/\s\s+/g, " ") - ); - }); - - $(".instructions") - .find("li") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - $(".recipe-meta") - .children("p") - .each((i, el) => { - const title = $(el) - .children("strong") - .text() - .replace(/\s*:|\s+(?=\s*)/g, ""); - const value = $(el) - .text() - .replace(/[^:]*:/, "") - .trim(); - switch (title) { - case "PrepTime": - Recipe.time.prep = value; - break; - case "CookTime": - Recipe.time.cook = value; - break; - case "TotalTime": - Recipe.time.total = value; - break; - case "Yield": - Recipe.servings = value; - break; - default: - break; - } - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = averieCooks; diff --git a/scrapers/index.js b/scrapers/index.js index b8759de..8e926c8 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -7,8 +7,4 @@ const recipeScraper = async url => { return await klass.fetchRecipe(); }; -recipeScraper( - "https://www.ambitiouskitchen.com/street-corn-pasta-salad-with-cilantro-pesto-goat-cheese/" -).then(recipe => console.log(recipe)); - module.exports = recipeScraper; diff --git a/test/101cookbooks.test.js b/test/101cookbooks.test.js index 1e716d0..6a1c946 100644 --- a/test/101cookbooks.test.js +++ b/test/101cookbooks.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const oneHundredAndOne = require("../scrapers/101cookbooks"); -const Constants = require("./constants/101cookbooksConstants"); +const constants = require("./constants/101cookbooksConstants"); -commonRecipeTest( - "101cookbooks", - oneHundredAndOne, - Constants, - "101cookbooks.com/" -); +commonRecipeTest("101cookbooks", constants, "101cookbooks.com/"); diff --git a/test/allRecipes.test.js b/test/allRecipes.test.js index a39dfda..f92f79b 100644 --- a/test/allRecipes.test.js +++ b/test/allRecipes.test.js @@ -1,39 +1,48 @@ "use strict"; -const expect = require("chai").expect; -const assert = require("chai").assert; +const { assert, expect } = require("chai"); -const allRecipes = require("../scrapers/allrecipes"); -const Constants = require("./constants/allRecipesConstants"); +const Scraper = require("../scrapers/AllRecipesScraper"); +const constants = require("./constants/allRecipesConstants"); describe("allRecipes", () => { + let allRecipes; + + before(() => { + allRecipes = new Scraper(); + }); + it("should fetch the expected recipe (old style)", async () => { - let actualRecipe = await allRecipes(Constants.testUrlOld); - expect(JSON.stringify(Constants.expectedRecipeOld)).to.equal( + allRecipes.url = constants.testUrlOld; + const actualRecipe = await allRecipes.fetchRecipe(); + expect(JSON.stringify(constants.expectedRecipeOld)).to.equal( JSON.stringify(actualRecipe) ); }); it("should fetch the expected recipe (new style)", async () => { - let actualRecipe = await allRecipes(Constants.testUrlNew); - expect(JSON.stringify(Constants.expectedRecipeNew)).to.equal( + allRecipes.url = constants.testUrlNew; + const actualRecipe = await allRecipes.fetchRecipe(); + expect(JSON.stringify(constants.expectedRecipeNew)).to.equal( JSON.stringify(actualRecipe) ); }); it("should throw an error if invalid url is used", async () => { try { - await allRecipes(Constants.invalidDomainUrl); + allRecipes.url = constants.invalidDomainUrl; + await allRecipes.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( - "url provided must include 'allrecipes.com/recipe'" + "url provided must include 'allrecipes.com/recipe/'" ); } }); it("should throw an error if a problem occurred during page retrieval", async () => { try { - await allRecipes(Constants.invalidUrl); + allRecipes.url = constants.invalidUrl; + await allRecipes.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); @@ -42,7 +51,8 @@ describe("allRecipes", () => { it("should throw an error if non-recipe page is used", async () => { try { - await allRecipes(Constants.nonRecipeUrl); + allRecipes.url = constants.nonRecipeUrl; + await allRecipes.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); diff --git a/test/ambitiouskitchen.test.js b/test/ambitiouskitchen.test.js index af6f860..d45fa36 100644 --- a/test/ambitiouskitchen.test.js +++ b/test/ambitiouskitchen.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const ambitiousKitchen = require("../scrapers/ambitiouskitchen"); -const Constants = require("./constants/ambitiouskitchenConstants"); +const constants = require("./constants/ambitiouskitchenConstants"); -commonRecipeTest( - "ambitiousKitchen", - ambitiousKitchen, - Constants, - "ambitiouskitchen.com" -); +commonRecipeTest("ambitiousKitchen", constants, "ambitiouskitchen.com/"); diff --git a/test/averiecooks.test.js b/test/averiecooks.test.js index 6367b3d..f7e6a67 100644 --- a/test/averiecooks.test.js +++ b/test/averiecooks.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const averieCooks = require("../scrapers/averiecooks"); -const Constants = require("./constants/averiecooksConstants"); +const constants = require("./constants/averiecooksConstants"); -commonRecipeTest("averieCooks", averieCooks, Constants, "averiecooks.com"); +commonRecipeTest("averieCooks", constants, "averiecooks.com/"); diff --git a/test/helpers/commonRecipeTest.js b/test/helpers/commonRecipeTest.js index e657b58..48bf6cb 100644 --- a/test/helpers/commonRecipeTest.js +++ b/test/helpers/commonRecipeTest.js @@ -1,16 +1,24 @@ -const expect = require("chai").expect; -const assert = require("chai").assert; +const { assert, expect } = require("chai"); +const ScraperFactory = require("../../helpers/ScraperFactory"); -function commonRecipeTest(name, scraper, Constants, url) { +const commonRecipeTest = (name, constants, url) => { describe(name, () => { + let scraper; + + before(() => { + scraper = ScraperFactory.getScraper(url); + }); + it("should fetch the expected recipe", async () => { - let actualRecipe = await scraper(Constants.testUrl); - expect(Constants.expectedRecipe).to.deep.equal(actualRecipe); + scraper.url = constants.testUrl; + let actualRecipe = await scraper.fetchRecipe(); + expect(constants.expectedRecipe).to.deep.equal(actualRecipe); }); it("should throw an error if a problem occurred during page retrieval", async () => { try { - await scraper(Constants.invalidUrl); + scraper.url = constants.invalidUrl; + await scraper.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); @@ -19,7 +27,8 @@ function commonRecipeTest(name, scraper, Constants, url) { it("should throw an error if the url doesn't contain required sub-url", async () => { try { - await scraper(Constants.invalidDomainUrl); + scraper.url = constants.invalidDomainUrl; + await scraper.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal(`url provided must include '${url}'`); @@ -28,13 +37,14 @@ function commonRecipeTest(name, scraper, Constants, url) { it("should throw an error if non-recipe page is used", async () => { try { - await scraper(Constants.nonRecipeUrl); + scraper.url = constants.nonRecipeUrl; + await scraper.fetchRecipe(constants.nonRecipeUrl); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); } }); }); -} +}; module.exports = commonRecipeTest; From 611f3afbb260c61c20d567f4f9a68b0a0f3ee000 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Wed, 13 Jan 2021 13:43:43 -0700 Subject: [PATCH 04/11] more refactored scrapes --- helpers/BaseScraper.js | 4 +- helpers/ScraperFactory.js | 4 +- scrapers/101CookbooksScraper.js | 50 ++++++++++++++++++ scrapers/AllRecipesScraper.js | 93 +++++++++++++++++++++++++++++++++ scrapers/AverieCooksScraper.js | 66 +++++++++++++++++++++++ scrapers/BbcGoodFoodScraper.js | 57 ++++++++++++++++++++ scrapers/BbcScraper.js | 40 ++++++++++++++ scrapers/bbc.js | 55 ------------------- scrapers/bbcgoodfood.js | 72 ------------------------- scrapers/index.js | 4 ++ test/bbc.test.js | 5 +- test/bbcgoodfood.test.js | 10 +--- 12 files changed, 319 insertions(+), 141 deletions(-) create mode 100644 scrapers/101CookbooksScraper.js create mode 100644 scrapers/AllRecipesScraper.js create mode 100644 scrapers/AverieCooksScraper.js create mode 100644 scrapers/BbcGoodFoodScraper.js create mode 100644 scrapers/BbcScraper.js delete mode 100644 scrapers/bbc.js delete mode 100644 scrapers/bbcgoodfood.js diff --git a/helpers/BaseScraper.js b/helpers/BaseScraper.js index d6ff3e5..b158ec7 100644 --- a/helpers/BaseScraper.js +++ b/helpers/BaseScraper.js @@ -31,7 +31,9 @@ class BaseScraper { * */ defaultSetImage($) { - this.recipe.image = $("meta[property='og:image']").attr("content"); + this.recipe.image = + $("meta[property='og:image']").attr("content") || + $("meta[name='og:image']").attr("content"); } /** diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index 57b46ee..1787e41 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -7,8 +7,8 @@ const domains = { allrecipes: require("../scrapers/AllRecipesScraper"), ambitiouskitchen: require("../scrapers/AmbitiousKitchenScraper"), averiecooks: require("../scrapers/AverieCooksScraper"), - bbc: require("../scrapers/bbc"), - bbcgoodfood: require("../scrapers/bbcgoodfood"), + bbc: require("../scrapers/BbcScraper"), + bbcgoodfood: require("../scrapers/BbcGoodFoodScraper"), bonappetit: require("../scrapers/bonappetit"), budgetbytes: require("../scrapers/budgetbytes"), centraltexasfoodbank: require("../scrapers/centraltexasfoodbank"), diff --git a/scrapers/101CookbooksScraper.js b/scrapers/101CookbooksScraper.js new file mode 100644 index 0000000..59a044b --- /dev/null +++ b/scrapers/101CookbooksScraper.js @@ -0,0 +1,50 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +class OneOOneCookbooksScraper extends BaseScraper { + constructor(url) { + super(url, "101cookbooks.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + const body = $(".wprm-recipe-container"); + this.recipe.name = body.children("h2").text(); + + $(".wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/\s\s+/g, " ") + .trim() + ); + }); + + $(".wprm-recipe-instruction-group").each((i, el) => { + instructions.push( + $(el) + .children(".wprm-recipe-group-name") + .text() + ); + $(el) + .find(".wprm-recipe-instruction-text") + .each((i, elChild) => { + instructions.push($(elChild).text()); + }); + }); + + time.prep = $($(".wprm-recipe-time").get(1)).text(); + time.total = $(".wprm-recipe-time") + .last() + .text(); + + this.recipe.servings = $(".wprm-recipe-time") + .first() + .text() + .trim(); + } +} + +module.exports = OneOOneCookbooksScraper; diff --git a/scrapers/AllRecipesScraper.js b/scrapers/AllRecipesScraper.js new file mode 100644 index 0000000..f5be337 --- /dev/null +++ b/scrapers/AllRecipesScraper.js @@ -0,0 +1,93 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +class AmbitiousKitchenScraper extends BaseScraper { + constructor(url) { + super(url, "allrecipes.com/recipe/"); + } + + newScrape($) { + this.recipe.name = this.recipe.name.replace(/\s\s+/g, ""); + const { ingredients, instructions, time } = this.recipe; + $(".recipe-meta-item").each((i, el) => { + const title = $(el) + .children(".recipe-meta-item-header") + .text() + .replace(/\s*:|\s+(?=\s*)/g, ""); + const value = $(el) + .children(".recipe-meta-item-body") + .text() + .replace(/\s\s+/g, ""); + switch (title) { + case "prep": + time.prep = value; + break; + case "cook": + time.cook = value; + break; + case "total": + time.total = value; + break; + case "additional": + time.inactive = value; + break; + case "Servings": + this.recipe.servings = value; + break; + default: + break; + } + }); + + $(".ingredients-item").each((i, el) => { + const ingredient = $(el) + .text() + .replace(/\s\s+/g, " ") + .trim(); + ingredients.push(ingredient); + }); + + $($(".instructions-section-item").find("p")).each((i, el) => { + const instruction = $(el).text(); + instructions.push(instruction); + }); + } + + oldScrape($) { + const { ingredients, instructions, time } = this.recipe; + $("#polaris-app label").each((i, el) => { + const item = $(el) + .text() + .replace(/\s\s+/g, ""); + if (item !== "Add all ingredients to list" && item !== "") { + ingredients.push(item); + } + }); + + $(".step").each((i, el) => { + const step = $(el) + .text() + .replace(/\s\s+/g, ""); + if (step !== "") { + instructions.push(step); + } + }); + time.prep = $("time[itemprop=prepTime]").text(); + time.cook = $("time[itemprop=cookTime]").text(); + time.ready = $("time[itemprop=totalTime]").text(); + this.recipe.servings = $("#metaRecipeServings").attr("content"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + if ((this.recipe.name = $(".intro").text())) { + this.newScrape($); + } else if ((this.recipe.name = $("#recipe-main-content").text())) { + this.oldScrape($); + } + } +} + +module.exports = AmbitiousKitchenScraper; diff --git a/scrapers/AverieCooksScraper.js b/scrapers/AverieCooksScraper.js new file mode 100644 index 0000000..24162e4 --- /dev/null +++ b/scrapers/AverieCooksScraper.js @@ -0,0 +1,66 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +class AverieCooksScraper extends BaseScraper { + constructor(url) { + super(url, "averiecooks.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".innerrecipe") + .children("h2") + .first() + .text(); + + $(".cookbook-ingredients-list") + .children("li") + .each((i, el) => { + ingredients.push( + $(el) + .text() + .trim() + .replace(/\s\s+/g, " ") + ); + }); + + $(".instructions") + .find("li") + .each((i, el) => { + instructions.push($(el).text()); + }); + + $(".recipe-meta") + .children("p") + .each((i, el) => { + const title = $(el) + .children("strong") + .text() + .replace(/\s*:|\s+(?=\s*)/g, ""); + const value = $(el) + .text() + .replace(/[^:]*:/, "") + .trim(); + switch (title) { + case "PrepTime": + time.prep = value; + break; + case "CookTime": + time.cook = value; + break; + case "TotalTime": + time.total = value; + break; + case "Yield": + this.recipe.servings = value; + break; + default: + break; + } + }); + } +} + +module.exports = AverieCooksScraper; diff --git a/scrapers/BbcGoodFoodScraper.js b/scrapers/BbcGoodFoodScraper.js new file mode 100644 index 0000000..49016b4 --- /dev/null +++ b/scrapers/BbcGoodFoodScraper.js @@ -0,0 +1,57 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping bbcgoodfood.com + * @extends BaseScraper + */ +class BbcGoodFoodScraper extends BaseScraper { + constructor(url) { + super(url, "bbcgoodfood.com/recipes/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $("meta[name='og:title']").attr("content"); + + $(".recipe-template__ingredients") + .find(".list-item") + .each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(" ,", ",") + ); + }); + + $(".recipe-template__method-steps") + .find(".list-item") + .children("div") + .each((i, el) => { + instructions.push($(el).text()); + }); + + $(".cook-and-prep-time") + .find(".list-item") + .each((i, el) => { + const text = $(el).text(); + if (text.includes("Prep")) { + time.prep = $(el) + .find("time") + .text(); + } else if (text.includes("Cook")) { + time.cook = $(el) + .find("time") + .text(); + } + }); + + this.recipe.servings = $(".masthead__servings") + .text() + .replace("Makes ", ""); + } +} + +module.exports = BbcGoodFoodScraper; diff --git a/scrapers/BbcScraper.js b/scrapers/BbcScraper.js new file mode 100644 index 0000000..37b502b --- /dev/null +++ b/scrapers/BbcScraper.js @@ -0,0 +1,40 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping bbc.co + * @extends BaseScraper + */ +class BbcScraper extends BaseScraper { + constructor(url) { + super(url, "bbc.co.uk/food/recipes/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".content-title__text").text(); + + $(".recipe-ingredients__list-item").each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".recipe-method__list-item-text").each((i, el) => { + instructions.push($(el).text()); + }); + + time.prep = $(".recipe-metadata__prep-time") + .first() + .text(); + time.cook = $(".recipe-metadata__cook-time") + .first() + .text(); + + this.recipe.servings = $(".recipe-metadata__serving") + .first() + .text(); + } +} + +module.exports = BbcScraper; diff --git a/scrapers/bbc.js b/scrapers/bbc.js deleted file mode 100644 index 3775718..0000000 --- a/scrapers/bbc.js +++ /dev/null @@ -1,55 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const bbc = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("bbc.co.uk/food/recipes/")) { - reject(new Error("url provided must include 'bbc.co.uk/food/recipes/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".content-title__text").text(); - - $(".recipe-ingredients__list-item").each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".recipe-method__list-item-text").each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - Recipe.time.prep = $(".recipe-metadata__prep-time") - .first() - .text(); - Recipe.time.cook = $(".recipe-metadata__cook-time") - .first() - .text(); - - Recipe.servings = $(".recipe-metadata__serving") - .first() - .text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = bbc; diff --git a/scrapers/bbcgoodfood.js b/scrapers/bbcgoodfood.js deleted file mode 100644 index a44c970..0000000 --- a/scrapers/bbcgoodfood.js +++ /dev/null @@ -1,72 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const bbcGoodFood = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("bbcgoodfood.com/recipes/")) { - reject(new Error("url provided must include 'bbcgoodfood.com/recipes/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[name='og:image']").attr("content"); - Recipe.name = $("meta[name='og:title']").attr("content"); - - $(".recipe-template__ingredients") - .find(".list-item") - .each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(" ,", ",") - ); - }); - - $(".recipe-template__method-steps") - .find(".list-item") - .children("div") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - $(".cook-and-prep-time") - .find(".list-item") - .each((i, el) => { - const text = $(el).text(); - if (text.includes("Prep")) { - Recipe.time.prep = $(el) - .find("time") - .text(); - } else if (text.includes("Cook")) { - Recipe.time.cook = $(el) - .find("time") - .text(); - } - }); - - Recipe.servings = $(".masthead__servings") - .text() - .replace("Makes ", ""); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = bbcGoodFood; diff --git a/scrapers/index.js b/scrapers/index.js index 8e926c8..cb858da 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -7,4 +7,8 @@ const recipeScraper = async url => { return await klass.fetchRecipe(); }; +recipeScraper( + "https://www.bbcgoodfood.com/recipes/doughnut-muffins" +).then(recipe => console.log(recipe)); + module.exports = recipeScraper; diff --git a/test/bbc.test.js b/test/bbc.test.js index 36baabe..f23d8b0 100644 --- a/test/bbc.test.js +++ b/test/bbc.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const bbc = require("../scrapers/bbc"); -const Constants = require("./constants/bbcConstants"); +const constants = require("./constants/bbcConstants"); -commonRecipeTest("bbc", bbc, Constants, "bbc.co.uk/food/recipes/"); +commonRecipeTest("bbc", constants, "bbc.co.uk/food/recipes/"); diff --git a/test/bbcgoodfood.test.js b/test/bbcgoodfood.test.js index 350c513..e69718a 100644 --- a/test/bbcgoodfood.test.js +++ b/test/bbcgoodfood.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const bbcGoodFood = require("../scrapers/bbcgoodfood"); -const Constants = require("./constants/bbcgoodfoodConstants"); +const constants = require("./constants/bbcgoodfoodConstants"); -commonRecipeTest( - "bbcGoodFood", - bbcGoodFood, - Constants, - "bbcgoodfood.com/recipes/" -); +commonRecipeTest("bbcGoodFood", constants, "bbcgoodfood.com/recipes/"); From 378c6b0cc310d85bb854496f442bf895a22f67b0 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Wed, 13 Jan 2021 13:58:50 -0700 Subject: [PATCH 05/11] Bon Appetit, BudgetBytes and CentralTexasFood refactored --- helpers/ScraperFactory.js | 6 +- scrapers/BonAppetitScraper.js | 42 +++++++++ scrapers/BudgetBytesScraper.js | 46 ++++++++++ scrapers/CentralTexasFoodBankScraper.js | 94 ++++++++++++++++++++ scrapers/bonappetit.js | 60 ------------- scrapers/budgetbytes.js | 61 ------------- scrapers/centraltexasfoodbank.js | 111 ------------------------ scrapers/index.js | 2 +- test/bonappetit.test.js | 5 +- test/budgetbytes.test.js | 5 +- 10 files changed, 190 insertions(+), 242 deletions(-) create mode 100644 scrapers/BonAppetitScraper.js create mode 100644 scrapers/BudgetBytesScraper.js create mode 100644 scrapers/CentralTexasFoodBankScraper.js delete mode 100644 scrapers/bonappetit.js delete mode 100644 scrapers/budgetbytes.js delete mode 100644 scrapers/centraltexasfoodbank.js diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index 1787e41..068631f 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -9,9 +9,9 @@ const domains = { averiecooks: require("../scrapers/AverieCooksScraper"), bbc: require("../scrapers/BbcScraper"), bbcgoodfood: require("../scrapers/BbcGoodFoodScraper"), - bonappetit: require("../scrapers/bonappetit"), - budgetbytes: require("../scrapers/budgetbytes"), - centraltexasfoodbank: require("../scrapers/centraltexasfoodbank"), + bonappetit: require("../scrapers/BonAppetitScraper"), + budgetbytes: require("../scrapers/BudgetBytesScraper"), + centraltexasfoodbank: require("../scrapers/CentralTexasFoodBankScraper"), closetcooking: require("../scrapers/closetcooking"), cookieandkate: require("../scrapers/cookieandkate"), copykat: require("../scrapers/copykat"), diff --git a/scrapers/BonAppetitScraper.js b/scrapers/BonAppetitScraper.js new file mode 100644 index 0000000..9cba469 --- /dev/null +++ b/scrapers/BonAppetitScraper.js @@ -0,0 +1,42 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping bonappetit.com + * @extends BaseScraper + */ +class BonAppetitScraper extends BaseScraper { + constructor(url) { + super(url, "bonappetit.com/recipe/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + + this.recipe.name = $("meta[property='og:title']").attr("content"); + + const container = $('div[data-testid="IngredientList"]'); + const ingredientsContainer = container.children("div"); + const units = ingredientsContainer.children("p"); + const ingrDivs = ingredientsContainer.children("div"); + + units.each((i, el) => { + ingredients.push(`${$(el).text()} ${$(ingrDivs[i]).text()}`); + }); + + const instructionContainer = $('div[data-testid="InstructionsWrapper"]'); + + instructionContainer.find("p").each((i, el) => { + instructions.push($(el).text()); + }); + + this.recipe.servings = container + .children("p") + .text() + .split(" ")[0]; + } +} + +module.exports = BonAppetitScraper; diff --git a/scrapers/BudgetBytesScraper.js b/scrapers/BudgetBytesScraper.js new file mode 100644 index 0000000..c28a2e2 --- /dev/null +++ b/scrapers/BudgetBytesScraper.js @@ -0,0 +1,46 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping budgetbytes.com + * @extends BaseScraper + */ +class BudgetBytesScraper extends BaseScraper { + constructor(url) { + super(url, "budgetbytes.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name").text(); + + $(".wprm-recipe-ingredient-notes").remove(); + $(".wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .trim() + ); + }); + + $(".wprm-recipe-instruction-text").each((i, el) => { + instructions.push($(el).text()); + }); + + time.prep = $(".wprm-recipe-prep-time-label") + .next() + .text(); + time.cook = $(".wprm-recipe-cook-time-label") + .next() + .text(); + time.total = $(".wprm-recipe-total-time-label") + .next() + .text(); + + this.recipe.servings = $(".wprm-recipe-servings").text(); + } +} + +module.exports = BudgetBytesScraper; diff --git a/scrapers/CentralTexasFoodBankScraper.js b/scrapers/CentralTexasFoodBankScraper.js new file mode 100644 index 0000000..0437db6 --- /dev/null +++ b/scrapers/CentralTexasFoodBankScraper.js @@ -0,0 +1,94 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); +const baseUrl = "https://www.centraltexasfoodbank.org"; + +/** + * Class for scraping centraltexasfoodbank.org + * @extends BaseScraper + */ +class CentralTexasFoodBankScraper extends BaseScraper { + constructor(url) { + super(url, "centraltexasfoodbank.org/recipe"); + } + + scrape($) { + const { ingredients, instructions, time } = this.recipe; + + this.recipe.image = + baseUrl + + $(".middle-section") + .find("img[typeof='foaf:Image']") + .first() + .prop("src"); + + this.recipe.name = $("#block-basis-page-title") + .find("span") + .text() + .toLowerCase() + .replace(/\b\w/g, l => l.toUpperCase()); + + $(".ingredients-container") + .find(".field-item") + .each((i, el) => { + ingredients.push( + $(el) + .text() + .trim() + ); + }); + + // Try a different pattern if first one fails + if (!ingredients.length) { + $(".field-name-field-ingredients") + .children("div") + .children("div") + .each((i, el) => { + ingredients.push( + $(el) + .text() + .trim() + ); + }); + } + + $(".bottom-section") + .find("li") + .each((i, el) => { + instructions.push($(el).text()); + }); + + // Try a different pattern if first one fails + if (!instructions.length) { + let done = false; + $(".bottom-section") + .find("p") + .each((i, el) => { + if (!done && !$(el).children("strong").length) { + let instructionEls = $(el) + .text() + .trim() + .replace(/\s\s+/g, " "); + if (!instructionEls.length) done = true; + let instructionList = instructionEls + .replace(/\d+\.\s/g, "") + .split("\n") + .filter(instruction => !!instruction.length); + instructions.push(...instructionList); + } + }); + } + + time.prep = $(".field-name-field-prep-time") + .find("div") + .text(); + time.cook = $(".field-name-field-cooking-time") + .find("div") + .text(); + this.recipe.servings = $(".field-name-field-serves-") + .find("div") + .text(); + } +} + +module.exports = CentralTexasFoodBankScraper; diff --git a/scrapers/bonappetit.js b/scrapers/bonappetit.js deleted file mode 100644 index 1cddfb4..0000000 --- a/scrapers/bonappetit.js +++ /dev/null @@ -1,60 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const bonAppetit = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("bonappetit.com/recipe/")) { - reject(new Error("url provided must include 'bonappetit.com/recipe/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $("meta[property='og:title']").attr("content"); - - const container = $('div[data-testid="IngredientList"]'); - const ingredientsContainer = container.children("div"); - const units = ingredientsContainer.children("p"); - const ingredients = ingredientsContainer.children("div"); - - units.each((i, el) => { - Recipe.ingredients.push( - `${$(el).text()} ${$(ingredients[i]).text()}` - ); - }); - - const instructionContainer = $( - 'div[data-testid="InstructionsWrapper"]' - ); - - instructionContainer.find("p").each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - Recipe.servings = container - .children("p") - .text() - .split(" ")[0]; - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = bonAppetit; diff --git a/scrapers/budgetbytes.js b/scrapers/budgetbytes.js deleted file mode 100644 index 902c131..0000000 --- a/scrapers/budgetbytes.js +++ /dev/null @@ -1,61 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const budgetBytes = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("budgetbytes.com/")) { - reject(new Error("url provided must include 'budgetbytes.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name").text(); - - $(".wprm-recipe-ingredient-notes").remove(); - $(".wprm-recipe-ingredient").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .trim() - ); - }); - - $(".wprm-recipe-instruction-text").each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - Recipe.time.prep = $(".wprm-recipe-prep-time-label") - .next() - .text(); - Recipe.time.cook = $(".wprm-recipe-cook-time-label") - .next() - .text(); - Recipe.time.total = $(".wprm-recipe-total-time-label") - .next() - .text(); - - Recipe.servings = $(".wprm-recipe-servings").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = budgetBytes; diff --git a/scrapers/centraltexasfoodbank.js b/scrapers/centraltexasfoodbank.js deleted file mode 100644 index 3ece27f..0000000 --- a/scrapers/centraltexasfoodbank.js +++ /dev/null @@ -1,111 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const baseUrl = "https://www.centraltexasfoodbank.org"; - -const centralTexasFoodBank = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("centraltexasfoodbank.org/recipe")) { - reject( - new Error( - "url provided must include 'centraltexasfoodbank.org/recipe/'" - ) - ); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = - baseUrl + - $(".middle-section") - .find("img[typeof='foaf:Image']") - .first() - .prop("src"); - - Recipe.name = $("#block-basis-page-title") - .find("span") - .text() - .toLowerCase() - .replace(/\b\w/g, l => l.toUpperCase()); - - $(".ingredients-container") - .find(".field-item") - .each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .trim() - ); - }); - - // Try a different pattern if first one fails - if (!Recipe.ingredients.length) { - $(".field-name-field-ingredients") - .children("div") - .children("div") - .each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .trim() - ); - }); - } - - $(".bottom-section") - .find("li") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - // Try a different pattern if first one fails - if (!Recipe.instructions.length) { - let done = false; - $(".bottom-section") - .find("p") - .each((i, el) => { - if (!done && !$(el).children("strong").length) { - let instructions = $(el) - .text() - .trim() - .replace(/\s\s+/g, " "); - if (!instructions.length) done = true; - let instructionList = instructions - .replace(/\d+\.\s/g, "") - .split("\n") - .filter(instruction => !!instruction.length); - Recipe.instructions.push(...instructionList); - } - }); - } - - Recipe.time.prep = $(".field-name-field-prep-time") - .find("div") - .text(); - Recipe.time.cook = $(".field-name-field-cooking-time") - .find("div") - .text(); - Recipe.servings = $(".field-name-field-serves-") - .find("div") - .text(); - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = centralTexasFoodBank; diff --git a/scrapers/index.js b/scrapers/index.js index cb858da..b6ed056 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -8,7 +8,7 @@ const recipeScraper = async url => { }; recipeScraper( - "https://www.bbcgoodfood.com/recipes/doughnut-muffins" + "https://www.centraltexasfoodbank.org/recipe/crock-pot-chicken-mole" ).then(recipe => console.log(recipe)); module.exports = recipeScraper; diff --git a/test/bonappetit.test.js b/test/bonappetit.test.js index f811af3..d10dde7 100644 --- a/test/bonappetit.test.js +++ b/test/bonappetit.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const bonAppetit = require("../scrapers/bonappetit"); -const Constants = require("./constants/bonappetitConstants"); +const constants = require("./constants/bonappetitConstants"); -commonRecipeTest("bonAppetit", bonAppetit, Constants, "bonappetit.com/recipe/"); +commonRecipeTest("bonAppetit", constants, "bonappetit.com/recipe/"); diff --git a/test/budgetbytes.test.js b/test/budgetbytes.test.js index 511d951..e3befa1 100644 --- a/test/budgetbytes.test.js +++ b/test/budgetbytes.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const budgetBytes = require("../scrapers/budgetbytes"); -const Constants = require("./constants/budgetbytesConstants"); +const constants = require("./constants/budgetbytesConstants"); -commonRecipeTest("budgetBytes", budgetBytes, Constants, "budgetbytes.com/"); +commonRecipeTest("budgetBytes", constants, "budgetbytes.com/"); From 253dd1bb4873f02bed234852d4ff456a93ea3282 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Thu, 14 Jan 2021 14:42:19 -0700 Subject: [PATCH 06/11] implement puppeteer super class and closetcooking --- helpers/PuppeteerScraper.js | 85 ++++++++++++++++++++++++++++++++ helpers/ScraperFactory.js | 2 +- scrapers/ClosetCookingScraper.js | 50 +++++++++++++++++++ scrapers/index.js | 2 +- 4 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 helpers/PuppeteerScraper.js create mode 100644 scrapers/ClosetCookingScraper.js diff --git a/helpers/PuppeteerScraper.js b/helpers/PuppeteerScraper.js new file mode 100644 index 0000000..145798a --- /dev/null +++ b/helpers/PuppeteerScraper.js @@ -0,0 +1,85 @@ +"use strict"; + +const puppeteer = require("puppeteer"); +const cheerio = require("cheerio"); + +const blockedResourceTypes = [ + "image", + "media", + "font", + "texttrack", + "object", + "beacon", + "csp_report", + "imageset", + "stylesheet", + "font" +]; + +const skippedResources = [ + "quantserve", + "adzerk", + "doubleclick", + "adition", + "exelator", + "sharethrough", + "cdn.api.twitter", + "google-analytics", + "googletagmanager", + "google", + "fontawesome", + "facebook", + "analytics", + "optimizely", + "clicktale", + "mixpanel", + "zedo", + "clicksor", + "tiqcdn" +]; + +const BaseScraper = require("./BaseScraper"); + +/** + * Inheritable class which uses puppeteer instead of a simple http request + */ +class PuppeteerScraper extends BaseScraper { + /** + * @override + * Fetches html from url using puppeteer headless browser + * @returns {object} - Cheerio instance + */ + async fetchDOMModel() { + const browser = await puppeteer.launch({ + headless: true + }); + + const page = await browser.newPage(); + await page.setRequestInterception(true); + + await page.on("request", req => { + const requestUrl = req._url.split("?")[0].split("#")[0]; + if ( + blockedResourceTypes.indexOf(req.resourceType()) !== -1 || + skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) + ) { + req.abort(); + } else { + req.continue(); + } + }); + + const response = await page.goto(this.url); + + let html; + if (response._status < 400) { + html = await page.content(); + } else { + throw new Error(response._status); + } + browser.close().catch(err => {}); + return cheerio.load(html); + } +} + +module.exports = PuppeteerScraper; diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index 068631f..3e4709d 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -12,7 +12,7 @@ const domains = { bonappetit: require("../scrapers/BonAppetitScraper"), budgetbytes: require("../scrapers/BudgetBytesScraper"), centraltexasfoodbank: require("../scrapers/CentralTexasFoodBankScraper"), - closetcooking: require("../scrapers/closetcooking"), + closetcooking: require("../scrapers/ClosetCookingScraper"), cookieandkate: require("../scrapers/cookieandkate"), copykat: require("../scrapers/copykat"), damndelicious: require("../scrapers/damndelicious"), diff --git a/scrapers/ClosetCookingScraper.js b/scrapers/ClosetCookingScraper.js new file mode 100644 index 0000000..c2db866 --- /dev/null +++ b/scrapers/ClosetCookingScraper.js @@ -0,0 +1,50 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping closetcooking.com + * @extends BaseScraper + */ +class ClosetCookingScraper extends PuppeteerScraper { + constructor(url) { + super(url, "closetcooking.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $(".recipe_title").text(); + + $(".ingredients") + .children("h6, li") + .each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".instructions") + .children("h6, li") + .each((i, el) => { + instructions.push($(el).text()); + }); + + $("a[rel='category tag']").each((i, el) => { + tags.push($(el).text()); + }); + + let metaData = $(".time"); + let prepText = metaData.first().text(); + time.prep = prepText.slice(prepText.indexOf(":") + 1).trim(); + let cookText = $(metaData.get(1)).text(); + time.cook = cookText.slice(cookText.indexOf(":") + 1).trim(); + let totalText = $(metaData.get(2)).text(); + time.total = totalText.slice(totalText.indexOf(":") + 1).trim(); + + let servingsText = $(".yield").text(); + this.recipe.servings = servingsText + .slice(servingsText.indexOf(":") + 1) + .trim(); + } +} + +module.exports = ClosetCookingScraper; diff --git a/scrapers/index.js b/scrapers/index.js index b6ed056..3297f29 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -8,7 +8,7 @@ const recipeScraper = async url => { }; recipeScraper( - "https://www.centraltexasfoodbank.org/recipe/crock-pot-chicken-mole" + "https://www.closetcooking.com/reina-pepiada-arepa-chicken-and-avocado-sandwich/" ).then(recipe => console.log(recipe)); module.exports = recipeScraper; From dfe776fbc93133d88163dfee54134f47b168a901 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Thu, 14 Jan 2021 15:11:34 -0700 Subject: [PATCH 07/11] all scrapes that use puppeteer --- helpers/PuppeteerScraper.js | 8 ++- helpers/ScraperFactory.js | 14 ++-- helpers/puppeteerFetch.js | 76 --------------------- scrapers/ClosetCookingScraper.js | 2 +- scrapers/CookieAndKateScraper.js | 48 ++++++++++++++ scrapers/CopyKatScraper.js | 53 +++++++++++++++ scrapers/DamnDeliciousScraper.js | 63 ++++++++++++++++++ scrapers/NomNomPaleoScraper.js | 55 ++++++++++++++++ scrapers/TastesBetterFromScratchScraper.js | 58 ++++++++++++++++ scrapers/TheRealFoodDrsScraper.js | 58 ++++++++++++++++ scrapers/YummlyScraper.js | 71 ++++++++++++++++++++ scrapers/closetcooking.js | 67 ------------------- scrapers/cookieandkate.js | 66 ------------------- scrapers/copykat.js | 67 ------------------- scrapers/damndelicious.js | 77 ---------------------- scrapers/index.js | 2 +- scrapers/nomnompaleo.js | 66 ------------------- scrapers/tastesbetterfromscratch.js | 74 --------------------- scrapers/therealfoodrds.js | 71 -------------------- 19 files changed, 422 insertions(+), 574 deletions(-) delete mode 100644 helpers/puppeteerFetch.js create mode 100644 scrapers/CookieAndKateScraper.js create mode 100644 scrapers/CopyKatScraper.js create mode 100644 scrapers/DamnDeliciousScraper.js create mode 100644 scrapers/NomNomPaleoScraper.js create mode 100644 scrapers/TastesBetterFromScratchScraper.js create mode 100644 scrapers/TheRealFoodDrsScraper.js create mode 100644 scrapers/YummlyScraper.js delete mode 100644 scrapers/closetcooking.js delete mode 100644 scrapers/cookieandkate.js delete mode 100644 scrapers/copykat.js delete mode 100644 scrapers/damndelicious.js delete mode 100644 scrapers/nomnompaleo.js delete mode 100644 scrapers/tastesbetterfromscratch.js delete mode 100644 scrapers/therealfoodrds.js diff --git a/helpers/PuppeteerScraper.js b/helpers/PuppeteerScraper.js index 145798a..d675988 100644 --- a/helpers/PuppeteerScraper.js +++ b/helpers/PuppeteerScraper.js @@ -44,6 +44,12 @@ const BaseScraper = require("./BaseScraper"); * Inheritable class which uses puppeteer instead of a simple http request */ class PuppeteerScraper extends BaseScraper { + /** + * + */ + async customPoll(page) { + return true; + } /** * @override * Fetches html from url using puppeteer headless browser @@ -53,7 +59,6 @@ class PuppeteerScraper extends BaseScraper { const browser = await puppeteer.launch({ headless: true }); - const page = await browser.newPage(); await page.setRequestInterception(true); @@ -73,6 +78,7 @@ class PuppeteerScraper extends BaseScraper { let html; if (response._status < 400) { + await this.customPoll(page); html = await page.content(); } else { throw new Error(response._status); diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index 3e4709d..4c80c86 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -13,9 +13,9 @@ const domains = { budgetbytes: require("../scrapers/BudgetBytesScraper"), centraltexasfoodbank: require("../scrapers/CentralTexasFoodBankScraper"), closetcooking: require("../scrapers/ClosetCookingScraper"), - cookieandkate: require("../scrapers/cookieandkate"), - copykat: require("../scrapers/copykat"), - damndelicious: require("../scrapers/damndelicious"), + cookieandkate: require("../scrapers/CookieAndKateScraper"), + copykat: require("../scrapers/CopyKatScraper"), + damndelicious: require("../scrapers/DamnDeliciousScraper"), eatingwell: require("../scrapers/eatingwell"), epicurious: require("../scrapers/epicurious"), food: require("../scrapers/food"), @@ -28,23 +28,23 @@ const domains = { melskitchencafe: require("../scrapers/melskitchencafe"), minimalistbaker: require("../scrapers/minimalistbaker"), myrecipes: require("../scrapers/myrecipes"), - nomnompaleo: require("../scrapers/nomnompaleo"), + nomnompaleo: require("../scrapers/NomNomPaleoScraper"), omnivorescookbook: require("../scrapers/omnivorescookbook"), pinchofyum: require("../scrapers/pinchofyum"), recipetineats: require("../scrapers/recipetineats"), seriouseats: require("../scrapers/seriouseats"), simplyrecipes: require("../scrapers/simplyrecipes"), smittenkitchen: require("../scrapers/smittenkitchen"), - tastesbetterfromscratch: require("../scrapers/tastesbetterfromscratch"), + tastesbetterfromscratch: require("../scrapers/TastesBetterFromScratchScraper"), tasteofhome: require("../scrapers/tasteofhome"), theblackpeppercorn: require("../scrapers/theblackpeppercorn"), therecipecritic: require("../scrapers/therecipecritic"), thepioneerwoman: require("../scrapers/thepioneerwoman"), - therealfoodrds: require("../scrapers/therealfoodrds"), + therealfoodrds: require("../scrapers/TheRealFoodDrsScraper"), thespruceeats: require("../scrapers/thespruceeats"), whatsgabycooking: require("../scrapers/whatsgabycooking"), woolworths: require("../scrapers/woolworths"), - yummly: require("../scrapers/yummly") + yummly: require("../scrapers/YummlyScraper") }; const instances = {}; diff --git a/helpers/puppeteerFetch.js b/helpers/puppeteerFetch.js deleted file mode 100644 index 9162f96..0000000 --- a/helpers/puppeteerFetch.js +++ /dev/null @@ -1,76 +0,0 @@ -const puppeteer = require("puppeteer"); - -const blockedResourceTypes = [ - "image", - "media", - "font", - "texttrack", - "object", - "beacon", - "csp_report", - "imageset", - "stylesheet", - "font" -]; - -const skippedResources = [ - "quantserve", - "adzerk", - "doubleclick", - "adition", - "exelator", - "sharethrough", - "cdn.api.twitter", - "google-analytics", - "googletagmanager", - "google", - "fontawesome", - "facebook", - "analytics", - "optimizely", - "clicktale", - "mixpanel", - "zedo", - "clicksor", - "tiqcdn" -]; - -const puppeteerFetch = async url => { - const browser = await puppeteer.launch({ - headless: true - }); - - const page = await browser.newPage(); - await page.setRequestInterception(true); - - await page.on("request", req => { - const requestUrl = req._url.split("?")[0].split("#")[0]; - if ( - blockedResourceTypes.indexOf(req.resourceType()) !== -1 || - skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) - ) { - req.abort(); - } else { - req.continue(); - } - }); - - const response = await page.goto(url); - - if (response._status < 400) { - let html = await page.content(); - try { - await browser.close(); - } finally { - return html; - } // avoid websocket error if browser already closed - } else { - try { - await browser.close(); - } finally { - return Promise.reject(response._status); - } - } -}; - -module.exports = puppeteerFetch; diff --git a/scrapers/ClosetCookingScraper.js b/scrapers/ClosetCookingScraper.js index c2db866..ca35336 100644 --- a/scrapers/ClosetCookingScraper.js +++ b/scrapers/ClosetCookingScraper.js @@ -4,7 +4,7 @@ const PuppeteerScraper = require("../helpers/PuppeteerScraper"); /** * Class for scraping closetcooking.com - * @extends BaseScraper + * @extends PuppeteerScraper */ class ClosetCookingScraper extends PuppeteerScraper { constructor(url) { diff --git a/scrapers/CookieAndKateScraper.js b/scrapers/CookieAndKateScraper.js new file mode 100644 index 0000000..c525f53 --- /dev/null +++ b/scrapers/CookieAndKateScraper.js @@ -0,0 +1,48 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping cookieandkate.com + * @extends BaseScraper + */ +class CookieAndKateScraper extends BaseScraper { + constructor(url) { + super(url, "cookieandkate.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $(".tasty-recipes") + .children("h2") + .text(); + + $(".tasty-recipe-ingredients") + .find("h4, li") + .each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".tasty-recipe-instructions") + .find("li") + .each((i, el) => { + instructions.push($(el).text()); + }); + + $("a[rel='category tag']").each((i, el) => { + tags.push($(el).text()); + }); + + time.prep = $(".tasty-recipes-prep-time").text(); + time.cook = $(".tasty-recipes-cook-time").text(); + time.total = $(".tasty-recipes-total-time").text(); + + $(".tasty-recipes-yield-scale").remove(); + this.recipe.servings = $(".tasty-recipes-yield") + .text() + .trim(); + } +} + +module.exports = CookieAndKateScraper; diff --git a/scrapers/CopyKatScraper.js b/scrapers/CopyKatScraper.js new file mode 100644 index 0000000..2b6acf6 --- /dev/null +++ b/scrapers/CopyKatScraper.js @@ -0,0 +1,53 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping copykat.com + * @extends PuppeteerScraper + */ +class CopyKatScraper extends PuppeteerScraper { + constructor(url) { + super(url, "copykat.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $( + $(".wprm-recipe-container").find(".wprm-recipe-name") + ).text(); + + $(".wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/\s\s+/g, " ") + .trim() + ); + }); + + $(".wprm-recipe-instructions").each((i, el) => { + instructions.push( + $(el) + .text() + .replace(/\s\s+/g, " ") + .trim() + ); + }); + + time.prep = $( + $(".wprm-recipe-prep-time-container").children(".wprm-recipe-time") + ).text(); + time.cook = $( + $(".wprm-recipe-cook-time-container").children(".wprm-recipe-time") + ).text(); + time.total = $( + $(".wprm-recipe-total-time-container").children(".wprm-recipe-time") + ).text(); + + this.recipe.servings = $(".wprm-recipe-servings").text(); + } +} + +module.exports = CopyKatScraper; diff --git a/scrapers/DamnDeliciousScraper.js b/scrapers/DamnDeliciousScraper.js new file mode 100644 index 0000000..4536227 --- /dev/null +++ b/scrapers/DamnDeliciousScraper.js @@ -0,0 +1,63 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping damndelicious.net + * @extends PuppeteerScraper + */ +class DamnDeliciousScraper extends PuppeteerScraper { + constructor(url) { + super(url, "damndelicious.net"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + const titleDiv = $(".recipe-title"); + + this.recipe.name = $(titleDiv) + .children("h2") + .text(); + + $(titleDiv) + .find("p") + .each((i, el) => { + let title = $(el) + .children("strong") + .text(); + let data = $(el) + .children("span") + .text(); + + switch (title) { + case "Yield:": + this.recipe.servings = data; + break; + case "prep time:": + time.prep = data; + break; + case "cook time:": + time.cook = data; + break; + case "total time:": + time.total = data; + break; + default: + break; + } + }); + + $("li[itemprop=ingredients]").each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".instructions") + .find("li") + .each((i, el) => { + instructions.push($(el).text()); + }); + } +} + +module.exports = DamnDeliciousScraper; diff --git a/scrapers/NomNomPaleoScraper.js b/scrapers/NomNomPaleoScraper.js new file mode 100644 index 0000000..b09612c --- /dev/null +++ b/scrapers/NomNomPaleoScraper.js @@ -0,0 +1,55 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping nomnompaleo.com + * @extends PuppeteerScraper + */ +class NomNomPaleoScraper extends PuppeteerScraper { + constructor(url) { + super(url, "nomnompaleo.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name").text(); + + $(".wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/\s\s+/g, " ") + .trim() + ); + }); + + this.recipe.tags = $(".wprm-recipe-keyword") + .text() + .split(",") + .map(x => x.trim()); + + $(".wprm-recipe-instruction-group").each((i, el) => { + let groupName = $(el) + .children(".wprm-recipe-group-name") + .text(); + if (groupName.length) { + instructions.push(groupName); + } + $(el) + .find(".wprm-recipe-instruction-text") + .each((i, elChild) => { + instructions.push($(elChild).text()); + }); + }); + const times = $(".wprm-recipe-time"); + time.prep = $(times.first()).text(); + time.cook = $(times.get(1)).text(); + time.total = $(times.last()).text(); + + this.recipe.servings = $(".wprm-recipe-servings-with-unit").text(); + } +} + +module.exports = NomNomPaleoScraper; diff --git a/scrapers/TastesBetterFromScratchScraper.js b/scrapers/TastesBetterFromScratchScraper.js new file mode 100644 index 0000000..da7436b --- /dev/null +++ b/scrapers/TastesBetterFromScratchScraper.js @@ -0,0 +1,58 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping tastesbetterfromscratch.com + * @extends PuppeteerScraper + */ +class TastesBetterFromScratchScraper extends PuppeteerScraper { + constructor(url) { + super(url, "tastesbetterfromscratch.com"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name").text(); + + $(".wprm-recipe-ingredient").each((i, el) => { + let amount = $(el) + .find(".wprm-recipe-ingredient-amount") + .text(); + let unit = $(el) + .find(".wprm-recipe-ingredient-unit") + .text(); + let name = $(el) + .find(".wprm-recipe-ingredient-name") + .text(); + let ingredient = `${amount} ${unit} ${name}` + .replace(/\s\s+/g, " ") + .trim(); + ingredients.push(ingredient); + }); + + $(".wprm-recipe-instruction").each((i, el) => { + instructions.push( + $(el) + .text() + .replace(/\s\s+/g, "") + ); + }); + + $(".wprm-recipe-time-container").each((i, el) => { + let text = $(el).text(); + if (text.includes("Total Time:")) { + time.total = text.replace("Total Time:", "").trim(); + } else if (text.includes("Prep Time:")) { + time.prep = text.replace("Prep Time:", "").trim(); + } else if (text.includes("Cook Time:")) { + time.cook = text.replace("Cook Time:", "").trim(); + } + }); + + this.recipe.servings = $(".wprm-recipe-servings").text() || ""; + } +} + +module.exports = TastesBetterFromScratchScraper; diff --git a/scrapers/TheRealFoodDrsScraper.js b/scrapers/TheRealFoodDrsScraper.js new file mode 100644 index 0000000..13c2d0b --- /dev/null +++ b/scrapers/TheRealFoodDrsScraper.js @@ -0,0 +1,58 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping therealfooddrs.com + * @extends PuppeteerScraper + */ +class TheRealFoodDrsScraper extends PuppeteerScraper { + constructor(url) { + super(url, "therealfoodrds.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".tasty-recipes-entry-header") + .children("h2") + .first() + .text(); + + $(".tasty-recipes-ingredients") + .find("li") + .each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/\s\s+/g, "") + ); + }); + + $(".tasty-recipes-instructions") + .find("h4, li") + .each((i, el) => { + instructions.push( + $(el) + .text() + .replace(/\s\s+/g, "") + ); + }); + + this.recipe.tags = $(".tasty-recipes-category") + .text() + .split("|") + .map(x => x.trim()); + + time.prep = $(".tasty-recipes-prep-time").text(); + time.cook = $(".tasty-recipes-cook-time").text(); + time.total = $(".tasty-recipes-total-time").text(); + + this.recipe.servings = $(".tasty-recipes-yield") + .children("span") + .first() + .text(); + } +} + +module.exports = TheRealFoodDrsScraper; diff --git a/scrapers/YummlyScraper.js b/scrapers/YummlyScraper.js new file mode 100644 index 0000000..69edb28 --- /dev/null +++ b/scrapers/YummlyScraper.js @@ -0,0 +1,71 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping yummly.com + * @extends PuppeteerScraper + */ +class YummlyScraper extends PuppeteerScraper { + constructor(url) { + super(url, "yummly.com/recipe"); + } + + /** + * @override + * Navigates through steps to recipe + */ + async customPoll(page) { + let steps = (await page.$$(".step")).length; + let newSteps = -1; + + while (steps >= newSteps) { + await page.waitFor(100); + await page.$eval( + "a.view-more-steps", + /* istanbul ignore next */ elem => elem.click() + ); + newSteps = (await page.$$(".step")).length; + } + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $(".recipe-title").text(); + + $(".recipe-tag").each((i, el) => { + tags.push( + $(el) + .find("a") + .text() + ); + }); + + $(".IngredientLine").each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".step").each((i, el) => { + instructions.push($(el).text()); + }); + + time.total = + $("div.unit") + .children() + .first() + .text() + + " " + + $("div.unit") + .children() + .last() + .text(); + + this.recipe.servings = $(".unit-serving-wrapper") + .find(".greyscale-1") + .text() + .split(" ")[0]; + } +} + +module.exports = YummlyScraper; diff --git a/scrapers/closetcooking.js b/scrapers/closetcooking.js deleted file mode 100644 index b6b358b..0000000 --- a/scrapers/closetcooking.js +++ /dev/null @@ -1,67 +0,0 @@ -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const puppeteerFetch = require("../helpers/puppeteerFetch"); - -const closetCooking = url => { - return new Promise(async (resolve, reject) => { - if (!url.includes("closetcooking.com/")) { - reject(new Error("url provided must include 'closetcooking.com/'")); - } else { - try { - const html = await puppeteerFetch(url); - const Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".recipe_title").text(); - - $(".ingredients") - .children("h6, li") - .each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".instructions") - .children("h6, li") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - $("a[rel='category tag']").each((i, el) => { - Recipe.tags.push( - $(el) - .text() - ); - }); - - let metaData = $(".time"); - let prepText = metaData.first().text(); - Recipe.time.prep = prepText.slice(prepText.indexOf(":") + 1).trim(); - let cookText = $(metaData.get(1)).text(); - Recipe.time.cook = cookText.slice(cookText.indexOf(":") + 1).trim(); - let totalText = $(metaData.get(2)).text(); - Recipe.time.total = totalText.slice(totalText.indexOf(":") + 1).trim(); - - let servingsText = $(".yield").text(); - Recipe.servings = servingsText - .slice(servingsText.indexOf(":") + 1) - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = closetCooking; diff --git a/scrapers/cookieandkate.js b/scrapers/cookieandkate.js deleted file mode 100644 index 9e30034..0000000 --- a/scrapers/cookieandkate.js +++ /dev/null @@ -1,66 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const cookieAndKate = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("cookieandkate.com/")) { - reject(new Error("url provided must include 'cookieandkate.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".tasty-recipes") - .children("h2") - .text(); - - $(".tasty-recipe-ingredients") - .find("h4, li") - .each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".tasty-recipe-instructions") - .find("li") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - $("a[rel='category tag']").each((i, el) => { - Recipe.tags.push( - $(el) - .text() - ); - }); - - Recipe.time.prep = $(".tasty-recipes-prep-time").text(); - Recipe.time.cook = $(".tasty-recipes-cook-time").text(); - Recipe.time.total = $(".tasty-recipes-total-time").text(); - - $(".tasty-recipes-yield-scale").remove(); - Recipe.servings = $(".tasty-recipes-yield") - .text() - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = cookieAndKate; diff --git a/scrapers/copykat.js b/scrapers/copykat.js deleted file mode 100644 index 1d82b82..0000000 --- a/scrapers/copykat.js +++ /dev/null @@ -1,67 +0,0 @@ -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const puppeteerFetch = require("../helpers/puppeteerFetch"); - -const copykat = url => { - return new Promise(async (resolve, reject) => { - if (!url.includes("copykat.com/")) { - reject(new Error("url provided must include 'copykat.com/'")); - } else { - try { - let html = await puppeteerFetch(url); - var Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $( - $(".wprm-recipe-container").find(".wprm-recipe-name") - ).text(); - - $(".wprm-recipe-ingredient").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .trim() - ); - }); - - $(".wprm-recipe-instructions").each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .trim() - ); - }); - - Recipe.time.prep = $( - $(".wprm-recipe-prep-time-container").children(".wprm-recipe-time") - ).text(); - Recipe.time.cook = $( - $(".wprm-recipe-cook-time-container").children(".wprm-recipe-time") - ).text(); - Recipe.time.total = $( - $(".wprm-recipe-total-time-container").children(".wprm-recipe-time") - ).text(); - - Recipe.servings = $(".wprm-recipe-servings").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = copykat; diff --git a/scrapers/damndelicious.js b/scrapers/damndelicious.js deleted file mode 100644 index c8aedf3..0000000 --- a/scrapers/damndelicious.js +++ /dev/null @@ -1,77 +0,0 @@ -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const puppeteerFetch = require("../helpers/puppeteerFetch"); - -const damnDelicious = url => { - return new Promise(async (resolve, reject) => { - if (!url.includes("damndelicious.net")) { - reject(new Error("url provided must include 'damndelicious.net'")); - } else { - try { - const html = await puppeteerFetch(url); - const Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - let titleDiv = $(".recipe-title"); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(titleDiv) - .children("h2") - .text(); - - $(titleDiv) - .find("p") - .each((i, el) => { - let title = $(el) - .children("strong") - .text(); - let data = $(el) - .children("span") - .text(); - - switch (title) { - case "Yield:": - Recipe.servings = data; - break; - case "prep time:": - Recipe.time.prep = data; - break; - case "cook time:": - Recipe.time.cook = data; - break; - case "total time:": - Recipe.time.total = data; - break; - default: - break; - } - }); - - $("li[itemprop=ingredients]").each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".instructions") - .find("li") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = damnDelicious; diff --git a/scrapers/index.js b/scrapers/index.js index 3297f29..d167b13 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -8,7 +8,7 @@ const recipeScraper = async url => { }; recipeScraper( - "https://www.closetcooking.com/reina-pepiada-arepa-chicken-and-avocado-sandwich/" + "https://www.yummly.com/recipe/No-Bake-Lemon-Mango-Cheesecakes-with-Speculoos-crust-781945" ).then(recipe => console.log(recipe)); module.exports = recipeScraper; diff --git a/scrapers/nomnompaleo.js b/scrapers/nomnompaleo.js deleted file mode 100644 index e9d4636..0000000 --- a/scrapers/nomnompaleo.js +++ /dev/null @@ -1,66 +0,0 @@ -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const puppeteerFetch = require("../helpers/puppeteerFetch"); - -const nomNomPaleo = url => { - return new Promise(async (resolve, reject) => { - if (!url.includes("nomnompaleo.com/")) { - reject(new Error("url provided must include 'nomnompaleo.com/'")); - } else { - try { - const html = await puppeteerFetch(url); - const Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name").text(); - - $(".wprm-recipe-ingredient").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .trim() - ); - }); - - Recipe.tags = $(".wprm-recipe-keyword").text().split(',').map(x => x.trim()); - - $(".wprm-recipe-instruction-group").each((i, el) => { - let groupName = $(el) - .children(".wprm-recipe-group-name") - .text(); - if (groupName.length) { - Recipe.instructions.push(groupName); - } - $(el) - .find(".wprm-recipe-instruction-text") - .each((i, elChild) => { - Recipe.instructions.push($(elChild).text()); - }); - }); - const times = $(".wprm-recipe-time"); - Recipe.time.prep = $(times.first()).text(); - Recipe.time.cook = $(times.get(1)).text(); - Recipe.time.total = $(times.last()).text(); - - Recipe.servings = $(".wprm-recipe-servings-with-unit").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = nomNomPaleo; diff --git a/scrapers/tastesbetterfromscratch.js b/scrapers/tastesbetterfromscratch.js deleted file mode 100644 index c961931..0000000 --- a/scrapers/tastesbetterfromscratch.js +++ /dev/null @@ -1,74 +0,0 @@ -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const puppeteerFetch = require("../helpers/puppeteerFetch"); - -const tastesBetterFromScratch = url => { - return new Promise(async (resolve, reject) => { - if (!url.includes("tastesbetterfromscratch.com")) { - reject( - new Error("url provided must include 'tastesbetterfromscratch.com'") - ); - } else { - try { - const html = await puppeteerFetch(url); - const Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name").text(); - - $(".wprm-recipe-ingredient").each((i, el) => { - let amount = $(el) - .find(".wprm-recipe-ingredient-amount") - .text(); - let unit = $(el) - .find(".wprm-recipe-ingredient-unit") - .text(); - let name = $(el) - .find(".wprm-recipe-ingredient-name") - .text(); - let ingredient = `${amount} ${unit} ${name}` - .replace(/\s\s+/g, " ") - .trim(); - Recipe.ingredients.push(ingredient); - }); - - $(".wprm-recipe-instruction").each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .replace(/\s\s+/g, "") - ); - }); - - $(".wprm-recipe-time-container").each((i, el) => { - let text = $(el).text(); - if (text.includes("Total Time:")) { - Recipe.time.total = text.replace("Total Time:", "").trim(); - } else if (text.includes("Prep Time:")) { - Recipe.time.prep = text.replace("Prep Time:", "").trim(); - } else if (text.includes("Cook Time:")) { - Recipe.time.cook = text.replace("Cook Time:", "").trim(); - } - }); - - Recipe.servings = $(".wprm-recipe-servings").text() || ""; - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = tastesBetterFromScratch; diff --git a/scrapers/therealfoodrds.js b/scrapers/therealfoodrds.js deleted file mode 100644 index 9e48d4d..0000000 --- a/scrapers/therealfoodrds.js +++ /dev/null @@ -1,71 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); -const puppeteerFetch = require("../helpers/puppeteerFetch"); - -const theRealFoodRds = url => { - const Recipe = new RecipeSchema(); - return new Promise(async (resolve, reject) => { - if (!url.includes("therealfoodrds.com/")) { - reject(new Error("url provided must include 'therealfoodrds.com/'")); - } else { - try { - const html = await puppeteerFetch(url); - const Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".tasty-recipes-entry-header") - .children("h2") - .first() - .text(); - - $(".tasty-recipes-ingredients") - .find("li") - .each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, "") - ); - }); - - $(".tasty-recipes-instructions") - .find("h4, li") - .each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .replace(/\s\s+/g, "") - ); - }); - - Recipe.tags = $(".tasty-recipes-category").text().split('|').map(x => x.trim()); - - Recipe.time.prep = $(".tasty-recipes-prep-time").text(); - Recipe.time.cook = $(".tasty-recipes-cook-time").text(); - Recipe.time.total = $(".tasty-recipes-total-time").text(); - - Recipe.servings = $(".tasty-recipes-yield") - .children("span") - .first() - .text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = theRealFoodRds; From b9a4027957c3d3598c3ee9d064c3605214eabb97 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Thu, 14 Jan 2021 16:40:56 -0700 Subject: [PATCH 08/11] lots more refactored scrapes --- helpers/BaseScraper.js | 12 +- helpers/PuppeteerScraper.js | 2 +- helpers/{recipe-schema.js => RecipeSchema.js} | 0 helpers/ScraperFactory.js | 31 ++-- scrapers/AllRecipesScraper.js | 2 +- scrapers/EatingWellScraper.js | 87 +++++++++++ scrapers/EpicuriousScraper.js | 44 ++++++ scrapers/FoodAndWineScraper.js | 52 +++++++ scrapers/FoodNetworkScraper.js | 72 +++++++++ scrapers/FoodScraper.js | 41 +++++ scrapers/GimmeDeliciousScraper.js | 58 +++++++ scrapers/GimmeSomeOvenScraper.js | 49 ++++++ scrapers/JulieBlannerScraper.js | 55 +++++++ scrapers/KitchenStoriesScraper.js | 85 +++++++++++ scrapers/YummlyScraper.js | 22 +-- scrapers/eatingwell.js | 104 ------------- scrapers/epicurious.js | 62 -------- scrapers/food.js | 56 ------- scrapers/foodandwine.js | 67 -------- scrapers/foodnetwork.js | 90 ----------- scrapers/gimmedelicious.js | 74 --------- scrapers/gimmesomeoven.js | 67 -------- scrapers/index.js | 2 +- scrapers/julieblanner.js | 72 --------- scrapers/kitchenstories.js | 86 ----------- scrapers/melskitchencafe.js | 2 +- scrapers/minimalistbaker.js | 2 +- scrapers/myrecipes.js | 2 +- scrapers/omnivorescookbook.js | 2 +- scrapers/pinchofyum.js | 2 +- scrapers/recipetineats.js | 2 +- scrapers/seriouseats.js | 2 +- scrapers/simplyrecipes.js | 2 +- scrapers/smittenkitchen.js | 2 +- scrapers/tasteofhome.js | 2 +- scrapers/theblackpeppercorn.js | 2 +- scrapers/thepioneerwoman.js | 2 +- scrapers/therecipecritic.js | 2 +- scrapers/thespruceeats.js | 2 +- scrapers/whatsgabycooking.js | 2 +- scrapers/woolworths.js | 2 +- scrapers/yummly.js | 144 ------------------ test/allRecipes.test.js | 2 +- test/centraltexasfoodbank.test.js | 8 +- test/closetcooking.test.js | 10 +- test/constants/eatingwellConstants.js | 4 +- test/cookieandkate.test.js | 10 +- test/copykat.test.js | 5 +- test/damndelicious.test.js | 10 +- test/eatingwell.test.js | 32 ++-- test/epicurious.test.js | 10 +- test/food.test.js | 5 +- test/foodandwine.test.js | 10 +- test/foodnetwork.test.js | 32 ++-- test/gimmedelicious.test.js | 10 +- test/gimmesomeoven.test.js | 10 +- test/julieblanner.test.js | 5 +- test/kitchenStories.test.js | 6 +- test/nomnompaleo.test.js | 5 +- test/tastesbetterfromscratch.test.js | 6 +- test/thereaddealfoodrds.test.js | 10 +- test/yummly.test.js | 5 +- 62 files changed, 673 insertions(+), 990 deletions(-) rename helpers/{recipe-schema.js => RecipeSchema.js} (100%) create mode 100644 scrapers/EatingWellScraper.js create mode 100644 scrapers/EpicuriousScraper.js create mode 100644 scrapers/FoodAndWineScraper.js create mode 100644 scrapers/FoodNetworkScraper.js create mode 100644 scrapers/FoodScraper.js create mode 100644 scrapers/GimmeDeliciousScraper.js create mode 100644 scrapers/GimmeSomeOvenScraper.js create mode 100644 scrapers/JulieBlannerScraper.js create mode 100644 scrapers/KitchenStoriesScraper.js delete mode 100644 scrapers/eatingwell.js delete mode 100644 scrapers/epicurious.js delete mode 100644 scrapers/food.js delete mode 100644 scrapers/foodandwine.js delete mode 100644 scrapers/foodnetwork.js delete mode 100644 scrapers/gimmedelicious.js delete mode 100644 scrapers/gimmesomeoven.js delete mode 100644 scrapers/julieblanner.js delete mode 100644 scrapers/kitchenstories.js delete mode 100644 scrapers/yummly.js diff --git a/helpers/BaseScraper.js b/helpers/BaseScraper.js index b158ec7..78d6254 100644 --- a/helpers/BaseScraper.js +++ b/helpers/BaseScraper.js @@ -3,10 +3,10 @@ const fetch = require("node-fetch"); const cheerio = require("cheerio"); -const RecipeSchema = require("./recipe-schema"); +const RecipeSchema = require("./RecipeSchema"); /** - * + * Abstract Class which all scrapers inherit from */ class BaseScraper { constructor(url, subUrl = "") { @@ -27,6 +27,10 @@ class BaseScraper { this.recipe = new RecipeSchema(); } + defaultError() { + throw new Error("No recipe found on page"); + } + /** * */ @@ -46,7 +50,7 @@ class BaseScraper { const html = await res.text(); return cheerio.load(html); } catch (err) { - throw new Error("No recide found on page"); + this.defaultError(); } } @@ -80,7 +84,7 @@ class BaseScraper { !this.recipe.ingredients.length || !this.recipe.instructions.length ) { - throw new Error("No recipe found on page"); + this.defaultError(); } return this.recipe; } diff --git a/helpers/PuppeteerScraper.js b/helpers/PuppeteerScraper.js index d675988..3c4cd30 100644 --- a/helpers/PuppeteerScraper.js +++ b/helpers/PuppeteerScraper.js @@ -81,7 +81,7 @@ class PuppeteerScraper extends BaseScraper { await this.customPoll(page); html = await page.content(); } else { - throw new Error(response._status); + this.defaultError(); } browser.close().catch(err => {}); return cheerio.load(html); diff --git a/helpers/recipe-schema.js b/helpers/RecipeSchema.js similarity index 100% rename from helpers/recipe-schema.js rename to helpers/RecipeSchema.js diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index 4c80c86..c750453 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -16,15 +16,15 @@ const domains = { cookieandkate: require("../scrapers/CookieAndKateScraper"), copykat: require("../scrapers/CopyKatScraper"), damndelicious: require("../scrapers/DamnDeliciousScraper"), - eatingwell: require("../scrapers/eatingwell"), - epicurious: require("../scrapers/epicurious"), - food: require("../scrapers/food"), - foodandwine: require("../scrapers/foodandwine"), - foodnetwork: require("../scrapers/foodnetwork"), - gimmedelicious: require("../scrapers/gimmedelicious"), - gimmesomeoven: require("../scrapers/gimmesomeoven"), - julieblanner: require("../scrapers/julieblanner"), - kitchenstories: require("../scrapers/kitchenstories"), + eatingwell: require("../scrapers/EatingWellScraper"), + epicurious: require("../scrapers/EpicuriousScraper"), + food: require("../scrapers/FoodScraper"), + foodandwine: require("../scrapers/FoodAndWineScraper"), + foodnetwork: require("../scrapers/FoodNetworkScraper"), + gimmedelicious: require("../scrapers/GimmeDeliciousScraper"), + gimmesomeoven: require("../scrapers/GimmeSomeOvenScraper"), + julieblanner: require("../scrapers/JulieBlannerScraper"), + kitchenstories: require("../scrapers/KitchenStoriesScraper"), melskitchencafe: require("../scrapers/melskitchencafe"), minimalistbaker: require("../scrapers/minimalistbaker"), myrecipes: require("../scrapers/myrecipes"), @@ -47,8 +47,6 @@ const domains = { yummly: require("../scrapers/YummlyScraper") }; -const instances = {}; - /** * A Singleton Factory to whom supplies an instance of a scraper based on a give URL */ @@ -57,14 +55,11 @@ class ScraperFactory { let parse = parseDomain(url); if (parse) { let domain = parse.domain; - if (!instances[domain]) { - if (domains[domain] !== undefined) { - instances[domain] = new domains[domain](url); - } else { - throw new Error("Site not yet supported"); - } + if (domains[domain] !== undefined) { + return new domains[domain](url); + } else { + throw new Error("Site not yet supported"); } - return instances[domain]; } else { throw new Error("Failed to parse domain"); } diff --git a/scrapers/AllRecipesScraper.js b/scrapers/AllRecipesScraper.js index f5be337..37878a5 100644 --- a/scrapers/AllRecipesScraper.js +++ b/scrapers/AllRecipesScraper.js @@ -4,7 +4,7 @@ const BaseScraper = require("../helpers/BaseScraper"); class AmbitiousKitchenScraper extends BaseScraper { constructor(url) { - super(url, "allrecipes.com/recipe/"); + super(url, "allrecipes.com/recipe"); } newScrape($) { diff --git a/scrapers/EatingWellScraper.js b/scrapers/EatingWellScraper.js new file mode 100644 index 0000000..7d762f3 --- /dev/null +++ b/scrapers/EatingWellScraper.js @@ -0,0 +1,87 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping eatingwell.com + * @extends BaseScraper + */ +class EatingWellScraper extends BaseScraper { + constructor(url) { + super(url, "eatingwell.com/recipe"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $(".main-header") + .find(".headline") + .text() + .trim(); + + $(".ingredients-section__legend, .ingredients-item-name").each((i, el) => { + if ( + !$(el) + .attr("class") + .includes("visually-hidden") + ) { + ingredients.push( + $(el) + .text() + .trim() + .replace(/\s\s+/g, " ") + ); + } + }); + + $(".instructions-section-item").each((i, el) => { + instructions.push( + $(el) + .find("p") + .text() + ); + }); + + $(".nutrition-profile-item").each((i, el) => { + tags.push( + $(el) + .find("a") + .text() + ); + }); + + $(".recipe-meta-item").each((i, el) => { + const title = $(el) + .children(".recipe-meta-item-header") + .text() + .replace(/\s*:|\s+(?=\s*)/g, ""); + const value = $(el) + .children(".recipe-meta-item-body") + .text() + .replace(/\s\s+/g, ""); + switch (title) { + case "prep": + time.prep = value; + break; + case "cook": + time.cook = value; + break; + case "active": + time.active = value; + case "total": + time.total = value; + break; + case "additional": + time.inactive = value; + break; + case "Servings": + this.recipe.servings = value; + break; + default: + break; + } + }); + } +} + +module.exports = EatingWellScraper; diff --git a/scrapers/EpicuriousScraper.js b/scrapers/EpicuriousScraper.js new file mode 100644 index 0000000..6d65fcc --- /dev/null +++ b/scrapers/EpicuriousScraper.js @@ -0,0 +1,44 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping epicurious.com + * @extends BaseScraper + */ +class EpicuriousScraper extends BaseScraper { + constructor(url) { + super(url, "epicurious.com/recipes/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $("h1[itemprop=name]") + .text() + .trim(); + + $(".ingredient").each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".preparation-step").each((i, el) => { + instructions.push( + $(el) + .text() + .replace(/\s\s+/g, "") + ); + }); + + $("dt[itemprop=recipeCategory]").each((i, el) => { + tags.push($(el).text()); + }); + + time.active = $("dd.active-time").text(); + time.total = $("dd.total-time").text(); + + this.recipe.servings = $("dd.yield").text(); + } +} + +module.exports = EpicuriousScraper; diff --git a/scrapers/FoodAndWineScraper.js b/scrapers/FoodAndWineScraper.js new file mode 100644 index 0000000..ab22e7a --- /dev/null +++ b/scrapers/FoodAndWineScraper.js @@ -0,0 +1,52 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping foodandwine.com + * @extends BaseScraper + */ +class FoodAndWineScraper extends BaseScraper { + constructor(url) { + super(url, "foodandwine.com/recipes/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $("h1.headline").text(); + + $(".ingredients-section") + .find(".ingredients-item-name") + .each((i, el) => { + ingredients.push( + $(el) + .text() + .trim() + ); + }); + + $(".recipe-instructions") + .find("p") + .each((i, el) => { + instructions.push($(el).text()); + }); + + let metaBody = $(".recipe-meta-item-body"); + + time.active = metaBody + .first() + .text() + .trim(); + time.total = $(metaBody.get(1)) + .text() + .trim(); + + this.recipe.servings = metaBody + .last() + .text() + .trim(); + } +} + +module.exports = FoodAndWineScraper; diff --git a/scrapers/FoodNetworkScraper.js b/scrapers/FoodNetworkScraper.js new file mode 100644 index 0000000..98cde24 --- /dev/null +++ b/scrapers/FoodNetworkScraper.js @@ -0,0 +1,72 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping foodnetwork.com + * @extends BaseScraper + */ +class FoodNetworkScraper extends BaseScraper { + constructor(url) { + super(url, "foodnetwork.com/recipes/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $(".o-AssetTitle__a-HeadlineText") + .first() + .text(); + + $(".o-Ingredients__a-Ingredient, .o-Ingredients__a-SubHeadline").each( + (i, el) => { + if (!$(el).hasClass("o-Ingredients__a-Ingredient--SelectAll")) { + const item = $(el) + .text() + .replace(/\s\s+/g, ""); + ingredients.push(item); + } + } + ); + + $(".o-Method__m-Step").each((i, el) => { + const step = $(el) + .text() + .replace(/\s\s+/g, ""); + if (step != "") { + instructions.push(step); + } + }); + + $(".o-RecipeInfo li").each((i, el) => { + let timeItem = $(el) + .text() + .replace(/\s\s+/g, "") + .split(":"); + switch (timeItem[0]) { + case "Prep": + time.prep = timeItem[1]; + break; + case "Active": + time.active = timeItem[1]; + break; + case "Inactive": + time.inactive = timeItem[1]; + break; + case "Cook": + time.cook = timeItem[1]; + break; + case "Total": + time.total = timeItem[1]; + break; + default: + } + }); + + $(".o-Capsule__a-Tag").each((i, el) => { + tags.push($(el).text()); + }); + } +} + +module.exports = FoodNetworkScraper; diff --git a/scrapers/FoodScraper.js b/scrapers/FoodScraper.js new file mode 100644 index 0000000..3d2b153 --- /dev/null +++ b/scrapers/FoodScraper.js @@ -0,0 +1,41 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping food.com + * @extends BaseScraper + */ +class FoodScraper extends BaseScraper { + constructor(url) { + super(url, "food.com/recipe/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".recipe-title").text(); + + $(".recipe-ingredients__item").each((i, el) => { + const item = $(el) + .text() + .replace(/\s\s+/g, " ") + .trim(); + ingredients.push(item); + }); + + $(".recipe-directions__step").each((i, el) => { + const step = $(el) + .text() + .replace(/\s\s+/g, ""); + instructions.push(step); + }); + + time.total = $(".recipe-facts__time") + .children() + .last() + .text(); + } +} + +module.exports = FoodScraper; diff --git a/scrapers/GimmeDeliciousScraper.js b/scrapers/GimmeDeliciousScraper.js new file mode 100644 index 0000000..168451e --- /dev/null +++ b/scrapers/GimmeDeliciousScraper.js @@ -0,0 +1,58 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping gimmedelicious.com + * @extends BaseScraper + */ +class GimmeDeliciousScraper extends BaseScraper { + constructor(url) { + super(url, "gimmedelicious.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name") + .text() + .trim(); + + this.recipe.tags = ($("meta[name='keywords']").attr("content") || "").split( + "," + ); + + $(".wprm-recipe-ingredients > .wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/▢/g, "") + ); + }); + + $(".wprm-recipe-instruction-text").each((i, el) => { + instructions.push( + $(el) + .remove("img") + .text() + .trim() + ); + }); + + time.prep = + $(".wprm-recipe-prep_time-minutes").text() + + " " + + $(".wprm-recipe-prep_timeunit-minutes").text(); + time.cook = + $(".wprm-recipe-cook_time-minutes").text() + + " " + + $(".wprm-recipe-cook_timeunit-minutes").text(); + time.total = + $(".wprm-recipe-total_time-minutes").text() + + " " + + $(".wprm-recipe-total_timeunit-minutes").text(); + this.recipe.servings = $(".wprm-recipe-servings").text(); + } +} + +module.exports = GimmeDeliciousScraper; diff --git a/scrapers/GimmeSomeOvenScraper.js b/scrapers/GimmeSomeOvenScraper.js new file mode 100644 index 0000000..19292c1 --- /dev/null +++ b/scrapers/GimmeSomeOvenScraper.js @@ -0,0 +1,49 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping gimmesomeoven.com + * @extends BaseScraper + */ +class GimmeSomeOvenScraper extends BaseScraper { + constructor(url) { + super(url, "gimmesomeoven.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $(".tasty-recipes-header-content") + .children("h2") + .first() + .text(); + + $(".tasty-recipes-ingredients") + .find("li") + .each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".tasty-recipes-instructions") + .find("li") + .each((i, el) => { + instructions.push($(el).text()); + }); + + time.prep = $(".tasty-recipes-prep-time").text(); + time.cook = $(".tasty-recipes-cook-time").text(); + time.total = $(".tasty-recipes-total-time").text(); + + $(".tasty-recipes-yield-scale").remove(); + this.recipe.servings = $(".tasty-recipes-yield") + .text() + .trim(); + + $("a[rel='category tag']").each((i, el) => { + tags.push($(el).text()); + }); + } +} + +module.exports = GimmeSomeOvenScraper; diff --git a/scrapers/JulieBlannerScraper.js b/scrapers/JulieBlannerScraper.js new file mode 100644 index 0000000..a6e4229 --- /dev/null +++ b/scrapers/JulieBlannerScraper.js @@ -0,0 +1,55 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping julieblanner.com + * @extends BaseScraper + */ +class JulieBlannerScraper extends BaseScraper { + constructor(url) { + super(url, "julieblanner.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name") + .text() + .trim(); + + $(".wprm-recipe-ingredients > .wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/(\s\s+|▢)/g, " ") + .trim() + ); + }); + + $(".wprm-recipe-instruction-text").each((i, el) => { + instructions.push( + $(el) + .remove("img") + .text() + .trim() + ); + }); + + time.prep = $(".wprm-recipe-prep-time-label") + .next() + .text(); + time.cook = $(".wprm-recipe-cook-time-label") + .next() + .text(); + time.inactive = $(".wprm-recipe-custom-time-label") + .next() + .text(); + time.total = $(".wprm-recipe-total-time-label") + .next() + .text(); + this.recipe.servings = $(".wprm-recipe-servings").text(); + } +} + +module.exports = JulieBlannerScraper; diff --git a/scrapers/KitchenStoriesScraper.js b/scrapers/KitchenStoriesScraper.js new file mode 100644 index 0000000..92fb6ae --- /dev/null +++ b/scrapers/KitchenStoriesScraper.js @@ -0,0 +1,85 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping kitchenstories.com + * @extends BaseScraper + */ +class KitchenStoriesScraper extends BaseScraper { + constructor(url) { + super(url); + this.subUrl = [ + "kitchenstories.com/en/recipes", + "kitchenstories.com/de/rezepte" + ]; + } + + /** + * @override + */ + checkUrl() { + const found = this.subUrl.reduce((found, url) => { + if (this.url.includes(url)) { + found = true; + } + return found; + }, false); + if (!found) { + throw new Error( + `url provided must include '${this.subUrl.join("' or '")}'` + ); + } + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".recipe-title").text(); + + $(".ingredients") + .find("tr") + .each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".step") + .children(".text") + .each((i, el) => { + instructions.push($(el).text()); + }); + + $(".time-cell").each((i, el) => { + let title = $(el) + .children(".title") + .text(); + let time = $(el) + .find(".time") + .text(); + let unit = $(el) + .find(".unit") + .text(); + if (parseInt(time)) { + switch (title) { + case "Preparation": + case "Zubereitung": + time.prep = `${time} ${unit}`; + break; + case "Baking": + case "Backzeit": + time.cook = `${time} ${unit}`; + break; + case "Resting": + case "Ruhezeit": + time.inactive = `${time} ${unit}`; + break; + default: + } + } + }); + + this.recipe.servings = $(".stepper-value").text(); + } +} + +module.exports = KitchenStoriesScraper; diff --git a/scrapers/YummlyScraper.js b/scrapers/YummlyScraper.js index 69edb28..65399e0 100644 --- a/scrapers/YummlyScraper.js +++ b/scrapers/YummlyScraper.js @@ -16,17 +16,19 @@ class YummlyScraper extends PuppeteerScraper { * Navigates through steps to recipe */ async customPoll(page) { - let steps = (await page.$$(".step")).length; - let newSteps = -1; + try { + let steps = (await page.$$(".step")).length; + let newSteps = -1; - while (steps >= newSteps) { - await page.waitFor(100); - await page.$eval( - "a.view-more-steps", - /* istanbul ignore next */ elem => elem.click() - ); - newSteps = (await page.$$(".step")).length; - } + while (steps >= newSteps) { + await page.waitFor(100); + await page.$eval( + "a.view-more-steps", + /* istanbul ignore next */ elem => elem.click() + ); + newSteps = (await page.$$(".step")).length; + } + } catch (err) {} } scrape($) { diff --git a/scrapers/eatingwell.js b/scrapers/eatingwell.js deleted file mode 100644 index 4a301ea..0000000 --- a/scrapers/eatingwell.js +++ /dev/null @@ -1,104 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const eatingWell = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("eatingwell.com/recipe")) { - reject(new Error("url provided must include 'eatingwell.com/recipe'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".main-header") - .find(".headline") - .text() - .trim(); - - $(".ingredients-section__legend, .ingredients-item-name").each( - (i, el) => { - if ( - !$(el) - .attr("class") - .includes("visually-hidden") - ) { - Recipe.ingredients.push( - $(el) - .text() - .trim() - .replace(/\s\s+/g, " ") - ); - } - } - ); - - $(".instructions-section-item").each((i, el) => { - Recipe.instructions.push( - $(el) - .find("p") - .text() - ); - }); - - $(".nutrition-profile-item").each((i, el) => { - Recipe.tags.push( - $(el) - .find("a") - .text() - ); - }); - - $(".recipe-meta-item").each((i, el) => { - const title = $(el) - .children(".recipe-meta-item-header") - .text() - .replace(/\s*:|\s+(?=\s*)/g, ""); - const value = $(el) - .children(".recipe-meta-item-body") - .text() - .replace(/\s\s+/g, ""); - switch (title) { - case "prep": - Recipe.time.prep = value; - break; - case "cook": - Recipe.time.cook = value; - break; - case "active": - Recipe.time.active = value; - case "total": - Recipe.time.total = value; - break; - case "additional": - Recipe.time.inactive = value; - break; - case "Servings": - Recipe.servings = value; - break; - default: - break; - } - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = eatingWell; diff --git a/scrapers/epicurious.js b/scrapers/epicurious.js deleted file mode 100644 index 4fed830..0000000 --- a/scrapers/epicurious.js +++ /dev/null @@ -1,62 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const epicurious = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("epicurious.com/recipes/")) { - reject(new Error("url provided must include 'epicurious.com/recipes/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $("h1[itemprop=name]") - .text() - .trim(); - - $(".ingredient").each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".preparation-step").each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .replace(/\s\s+/g, "") - ); - }); - - $("dt[itemprop=recipeCategory]").each((i, el) => { - Recipe.tags.push( - $(el) - .text() - ); - }); - - Recipe.time.active = $("dd.active-time").text(); - Recipe.time.total = $("dd.total-time").text(); - - Recipe.servings = $("dd.yield").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = epicurious; diff --git a/scrapers/food.js b/scrapers/food.js deleted file mode 100644 index b101d93..0000000 --- a/scrapers/food.js +++ /dev/null @@ -1,56 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const food = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("food.com/recipe/")) { - reject(new Error("url provided must include 'food.com/recipe/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[name='og:image']").attr("content"); - Recipe.name = $(".recipe-title").text(); - - $(".recipe-ingredients__item").each((i, el) => { - const item = $(el) - .text() - .replace(/\s\s+/g, " ") - .trim(); - Recipe.ingredients.push(item); - }); - - $(".recipe-directions__step").each((i, el) => { - const step = $(el) - .text() - .replace(/\s\s+/g, ""); - Recipe.instructions.push(step); - }); - - Recipe.time.total = $(".recipe-facts__time") - .children() - .last() - .text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = food; diff --git a/scrapers/foodandwine.js b/scrapers/foodandwine.js deleted file mode 100644 index a80ffe8..0000000 --- a/scrapers/foodandwine.js +++ /dev/null @@ -1,67 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const foodAndWine = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("foodandwine.com/recipes/")) { - reject(new Error("url provided must include 'foodandwine.com/recipes/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $("h1.headline").text(); - - $(".ingredients-section") - .find(".ingredients-item-name") - .each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .trim() - ); - }); - - $(".recipe-instructions") - .find("p") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - let metaBody = $(".recipe-meta-item-body"); - - Recipe.time.active = metaBody - .first() - .text() - .trim(); - Recipe.time.total = $(metaBody.get(1)) - .text() - .trim(); - - Recipe.servings = metaBody - .last() - .text() - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = foodAndWine; diff --git a/scrapers/foodnetwork.js b/scrapers/foodnetwork.js deleted file mode 100644 index 0446fc0..0000000 --- a/scrapers/foodnetwork.js +++ /dev/null @@ -1,90 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const foodNetwork = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("foodnetwork.com/recipes/")) { - reject(new Error("url provided must include 'foodnetwork.com/recipes/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".o-AssetTitle__a-HeadlineText") - .first() - .text(); - - $(".o-Ingredients__a-Ingredient, .o-Ingredients__a-SubHeadline").each( - (i, el) => { - if (!$(el).hasClass("o-Ingredients__a-Ingredient--SelectAll")) { - const item = $(el) - .text() - .replace(/\s\s+/g, ""); - Recipe.ingredients.push(item); - } - } - ); - - $(".o-Method__m-Step").each((i, el) => { - const step = $(el) - .text() - .replace(/\s\s+/g, ""); - if (step != "") { - Recipe.instructions.push(step); - } - }); - - $(".o-RecipeInfo li").each((i, el) => { - let timeItem = $(el) - .text() - .replace(/\s\s+/g, "") - .split(":"); - switch (timeItem[0]) { - case "Prep": - Recipe.time.prep = timeItem[1]; - break; - case "Active": - Recipe.time.active = timeItem[1]; - break; - case "Inactive": - Recipe.time.inactive = timeItem[1]; - break; - case "Cook": - Recipe.time.cook = timeItem[1]; - break; - case "Total": - Recipe.time.total = timeItem[1]; - break; - default: - } - }); - - $(".o-Capsule__a-Tag").each((i, el) => { - Recipe.tags.push( - $(el) - .text() - ); - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = foodNetwork; diff --git a/scrapers/gimmedelicious.js b/scrapers/gimmedelicious.js deleted file mode 100644 index 7cb777c..0000000 --- a/scrapers/gimmedelicious.js +++ /dev/null @@ -1,74 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const gimmedelicious = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("gimmedelicious.com/")) { - reject(new Error("url provided must include 'gimmedelicious.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.tags = ( - $("meta[name='keywords']").attr("content") || "" - ).split(","); - Recipe.name = $(".wprm-recipe-name") - .text() - .trim(); - - $(".wprm-recipe-ingredients > .wprm-recipe-ingredient").each( - (i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/▢/g, "") - ); - } - ); - - $(".wprm-recipe-instruction-text").each((i, el) => { - Recipe.instructions.push( - $(el) - .remove("img") - .text() - .trim() - ); - }); - - Recipe.time.prep = - $(".wprm-recipe-prep_time-minutes").text() + - " " + - $(".wprm-recipe-prep_timeunit-minutes").text(); - Recipe.time.cook = - $(".wprm-recipe-cook_time-minutes").text() + - " " + - $(".wprm-recipe-cook_timeunit-minutes").text(); - Recipe.time.total = - $(".wprm-recipe-total_time-minutes").text() + - " " + - $(".wprm-recipe-total_timeunit-minutes").text(); - Recipe.servings = $(".wprm-recipe-servings").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = gimmedelicious; diff --git a/scrapers/gimmesomeoven.js b/scrapers/gimmesomeoven.js deleted file mode 100644 index 7b06c77..0000000 --- a/scrapers/gimmesomeoven.js +++ /dev/null @@ -1,67 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const gimmeSomeOven = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("gimmesomeoven.com/")) { - reject(new Error("url provided must include 'gimmesomeoven.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".tasty-recipes-header-content") - .children("h2") - .first() - .text(); - - $(".tasty-recipes-ingredients") - .find("li") - .each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".tasty-recipes-instructions") - .find("li") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - Recipe.time.prep = $(".tasty-recipes-prep-time").text(); - Recipe.time.cook = $(".tasty-recipes-cook-time").text(); - Recipe.time.total = $(".tasty-recipes-total-time").text(); - - $(".tasty-recipes-yield-scale").remove(); - Recipe.servings = $(".tasty-recipes-yield") - .text() - .trim(); - - $("a[rel='category tag']").each((i, el) => { - Recipe.tags.push( - $(el) - .text() - ); - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = gimmeSomeOven; diff --git a/scrapers/index.js b/scrapers/index.js index d167b13..99a9ac8 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -8,7 +8,7 @@ const recipeScraper = async url => { }; recipeScraper( - "https://www.yummly.com/recipe/No-Bake-Lemon-Mango-Cheesecakes-with-Speculoos-crust-781945" + "https://www.epicurious.com/recipes/food/views/trout-toast-with-soft-scrambled-eggs" ).then(recipe => console.log(recipe)); module.exports = recipeScraper; diff --git a/scrapers/julieblanner.js b/scrapers/julieblanner.js deleted file mode 100644 index 70a9fbd..0000000 --- a/scrapers/julieblanner.js +++ /dev/null @@ -1,72 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const julieblanner = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("julieblanner.com/")) { - reject(new Error("url provided must include 'julieblanner.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name") - .text() - .trim(); - - $(".wprm-recipe-ingredients > .wprm-recipe-ingredient").each( - (i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/(\s\s+|▢)/g, " ") - .trim() - ); - } - ); - - $(".wprm-recipe-instruction-text").each((i, el) => { - Recipe.instructions.push( - $(el) - .remove("img") - .text() - .trim() - ); - }); - - Recipe.time.prep = $(".wprm-recipe-prep-time-label") - .next() - .text(); - Recipe.time.cook = $(".wprm-recipe-cook-time-label") - .next() - .text(); - Recipe.time.inactive = $(".wprm-recipe-custom-time-label") - .next() - .text(); - Recipe.time.total = $(".wprm-recipe-total-time-label") - .next() - .text(); - Recipe.servings = $(".wprm-recipe-servings").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = julieblanner; diff --git a/scrapers/kitchenstories.js b/scrapers/kitchenstories.js deleted file mode 100644 index 900892d..0000000 --- a/scrapers/kitchenstories.js +++ /dev/null @@ -1,86 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const kitchenStories = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if ( - !url.includes("kitchenstories.com/en/recipes") && - !url.includes("kitchenstories.com/de/rezepte") - ) { - reject( - new Error( - "url provided must include 'kitchenstories.com/en/recipes' or 'kitchenstories.com/de/rezepte'" - ) - ); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".recipe-title").text(); - - $(".ingredients") - .find("tr") - .each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".step") - .children(".text") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - $(".time-cell").each((i, el) => { - let title = $(el) - .children(".title") - .text(); - let time = $(el) - .find(".time") - .text(); - let unit = $(el) - .find(".unit") - .text(); - if (parseInt(time)) { - switch (title) { - case "Preparation": - case "Zubereitung": - Recipe.time.prep = `${time} ${unit}`; - break; - case "Baking": - case "Backzeit": - Recipe.time.cook = `${time} ${unit}`; - break; - case "Resting": - case "Ruhezeit": - Recipe.time.inactive = `${time} ${unit}`; - break; - default: - } - } - }); - - Recipe.servings = $(".stepper-value").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = kitchenStories; diff --git a/scrapers/melskitchencafe.js b/scrapers/melskitchencafe.js index 1a2e439..c12c445 100644 --- a/scrapers/melskitchencafe.js +++ b/scrapers/melskitchencafe.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const melskitchencafe = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/minimalistbaker.js b/scrapers/minimalistbaker.js index 163ae88..81e5a7e 100644 --- a/scrapers/minimalistbaker.js +++ b/scrapers/minimalistbaker.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const minimalistBaker = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/myrecipes.js b/scrapers/myrecipes.js index 8fdec59..1d992d9 100644 --- a/scrapers/myrecipes.js +++ b/scrapers/myrecipes.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const myRecipes = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/omnivorescookbook.js b/scrapers/omnivorescookbook.js index 8cffd89..8ef1363 100644 --- a/scrapers/omnivorescookbook.js +++ b/scrapers/omnivorescookbook.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const omnivorescookbook = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/pinchofyum.js b/scrapers/pinchofyum.js index 143f6bb..a3aea42 100644 --- a/scrapers/pinchofyum.js +++ b/scrapers/pinchofyum.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const tasteOfYum = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/recipetineats.js b/scrapers/recipetineats.js index 8c0cbda..27c82f3 100644 --- a/scrapers/recipetineats.js +++ b/scrapers/recipetineats.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const recipeTinEats = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/seriouseats.js b/scrapers/seriouseats.js index f53cbea..364cbd7 100644 --- a/scrapers/seriouseats.js +++ b/scrapers/seriouseats.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const seriousEats = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/simplyrecipes.js b/scrapers/simplyrecipes.js index 142dd67..9fd2a22 100644 --- a/scrapers/simplyrecipes.js +++ b/scrapers/simplyrecipes.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const simplyRecipes = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/smittenkitchen.js b/scrapers/smittenkitchen.js index 9e28873..60b3a39 100644 --- a/scrapers/smittenkitchen.js +++ b/scrapers/smittenkitchen.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const smittenKitchen = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/tasteofhome.js b/scrapers/tasteofhome.js index 36fa1d5..378d589 100644 --- a/scrapers/tasteofhome.js +++ b/scrapers/tasteofhome.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const tasteofhome = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/theblackpeppercorn.js b/scrapers/theblackpeppercorn.js index ac1c291..2bd3ede 100644 --- a/scrapers/theblackpeppercorn.js +++ b/scrapers/theblackpeppercorn.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const theblackpeppercorn = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/thepioneerwoman.js b/scrapers/thepioneerwoman.js index 0df4e04..7d77a54 100644 --- a/scrapers/thepioneerwoman.js +++ b/scrapers/thepioneerwoman.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const thePioneerWoman = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/therecipecritic.js b/scrapers/therecipecritic.js index 8156de7..dc81a92 100644 --- a/scrapers/therecipecritic.js +++ b/scrapers/therecipecritic.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const therecipecritic = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/thespruceeats.js b/scrapers/thespruceeats.js index 425a2c8..710357b 100644 --- a/scrapers/thespruceeats.js +++ b/scrapers/thespruceeats.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const theSpruceEats = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/whatsgabycooking.js b/scrapers/whatsgabycooking.js index 412800f..2a76d24 100644 --- a/scrapers/whatsgabycooking.js +++ b/scrapers/whatsgabycooking.js @@ -1,7 +1,7 @@ const request = require("request"); const cheerio = require("cheerio"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const whatsGabyCooking = url => { const Recipe = new RecipeSchema(); diff --git a/scrapers/woolworths.js b/scrapers/woolworths.js index f4b3741..7d730f8 100644 --- a/scrapers/woolworths.js +++ b/scrapers/woolworths.js @@ -1,5 +1,5 @@ const request = require("request"); -const RecipeSchema = require("../helpers/recipe-schema"); +const RecipeSchema = require("../helpers/RecipeSchema"); const urlRe = /\/(\d\d\d\d)\//; const instructionsIndexRe = /(?:\d.)(.*)/; diff --git a/scrapers/yummly.js b/scrapers/yummly.js deleted file mode 100644 index 7b5fa44..0000000 --- a/scrapers/yummly.js +++ /dev/null @@ -1,144 +0,0 @@ -const cheerio = require("cheerio"); -const puppeteer = require("puppeteer"); - -const RecipeSchema = require("../helpers/recipe-schema"); - -const blockedResourceTypes = [ - "image", - "media", - "font", - "texttrack", - "object", - "beacon", - "csp_report", - "imageset", - "stylesheet", - "font" -]; - -const skippedResources = [ - "quantserve", - "adzerk", - "doubleclick", - "adition", - "exelator", - "sharethrough", - "cdn.api.twitter", - "google-analytics", - "googletagmanager", - "google", - "fontawesome", - "facebook", - "analytics", - "optimizely", - "clicktale", - "mixpanel", - "zedo", - "clicksor", - "tiqcdn" -]; - -const customPuppeteerFetch = async url => { - const browser = await puppeteer.launch(); - const page = await browser.newPage(); - await page.setRequestInterception(true); - - page.on("request", req => { - const requestUrl = req._url.split("?")[0].split("#")[0]; - if ( - blockedResourceTypes.indexOf(req.resourceType()) !== -1 || - skippedResources.some(resource => requestUrl.indexOf(resource) !== -1) - ) { - req.abort(); - } else { - req.continue(); - } - }); - try { - const response = await page.goto(url); - if (response._status < 400) { - try { - let steps = (await page.$$(".step")).length; - let newSteps = -1; - - while (steps >= newSteps) { - await page.waitFor(100); - await page.$eval( - "a.view-more-steps", - /* istanbul ignore next */ elem => elem.click() - ); - newSteps = (await page.$$(".step")).length; - } - } finally { - let html = await page.content(); - await browser.close(); - return html; - } - } else { - await brower.close(); - return Promise.reject(response._status); - } - } catch (e) { - await browser.close(); - return Promise.reject("invalid url"); - } -}; - -const yummy = url => { - return new Promise(async (resolve, reject) => { - if (!url.includes("yummly.com/recipe")) { - reject(new Error("url provided must include 'yummly.com/recipe'")); - } else { - try { - const html = await customPuppeteerFetch(url); - const Recipe = new RecipeSchema(); - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".recipe-title").text(); - - $(".recipe-tag").each((i, el) => { - Recipe.tags.push( - $(el) - .find("a") - .text() - ); - }); - - $(".IngredientLine").each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".step").each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - Recipe.time.total = - $("div.unit") - .children() - .first() - .text() + - " " + - $("div.unit") - .children() - .last() - .text(); - - Recipe.servings = $(".unit-serving-wrapper") - .find(".greyscale-1") - .text() - .split(" ")[0]; - - if (!Recipe.name || !Recipe.ingredients.length) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } catch (error) { - reject(new Error("No recipe found on page")); - } - } - }); -}; - -module.exports = yummy; diff --git a/test/allRecipes.test.js b/test/allRecipes.test.js index f92f79b..8239b24 100644 --- a/test/allRecipes.test.js +++ b/test/allRecipes.test.js @@ -34,7 +34,7 @@ describe("allRecipes", () => { assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( - "url provided must include 'allrecipes.com/recipe/'" + "url provided must include 'allrecipes.com/recipe'" ); } }); diff --git a/test/centraltexasfoodbank.test.js b/test/centraltexasfoodbank.test.js index 055176f..d27e9f4 100644 --- a/test/centraltexasfoodbank.test.js +++ b/test/centraltexasfoodbank.test.js @@ -1,11 +1,9 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const centralTexasFoodBank = require("../scrapers/centraltexasfoodbank"); -const Constants = require("./constants/centraltexasfoodbankConstants"); +const constants = require("./constants/centraltexasfoodbankConstants"); commonRecipeTest( "centralTexasFoodBank", - centralTexasFoodBank, - Constants, - "centraltexasfoodbank.org/recipe/" + constants, + "centraltexasfoodbank.org/recipe" ); diff --git a/test/closetcooking.test.js b/test/closetcooking.test.js index feac37b..86642cb 100644 --- a/test/closetcooking.test.js +++ b/test/closetcooking.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const closetCooking = require("../scrapers/closetcooking"); -const Constants = require("./constants/closetcookingConstants"); +const constants = require("./constants/closetcookingConstants"); -commonRecipeTest( - "closetCooking", - closetCooking, - Constants, - "closetcooking.com/" -); +commonRecipeTest("closetCooking", constants, "closetcooking.com/"); diff --git a/test/constants/eatingwellConstants.js b/test/constants/eatingwellConstants.js index 742a794..70f22d8 100644 --- a/test/constants/eatingwellConstants.js +++ b/test/constants/eatingwellConstants.js @@ -20,10 +20,10 @@ module.exports = { "4 cups low-sodium chicken broth", "1 (15 ounce) can low-sodium black beans, rinsed", "1 (14 ounce) can no-salt-added fire-roasted diced tomatoes", - "1 Juice of 1 lime", + "Juice of 1 lime", "½ cup chopped fresh cilantro, plus more for garnish", "¾ cup shredded Mexican-style cheese blend", - "1 cup Tortilla chips for garnish" + "Tortilla chips for garnish" ], instructions: [ "Heat oil on high heat using the sauté function of your multicooker. (No sauté mode? See Tip.) Add onion, poblano, chicken, garlic, chili powder and salt. Cook, stirring occasionally, until the vegetables have softened and the chicken is no longer pink on the outside, about 5 minutes. Turn off the heat. Stir in broth, beans and tomatoes. Close and lock the lid. Cook at high pressure for 10 minutes.", diff --git a/test/cookieandkate.test.js b/test/cookieandkate.test.js index 1e0e004..4d11e74 100644 --- a/test/cookieandkate.test.js +++ b/test/cookieandkate.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const cookieAndKate = require("../scrapers/cookieandkate"); -const Constants = require("./constants/cookieandkateConstants"); +const constants = require("./constants/cookieandkateConstants"); -commonRecipeTest( - "cookieAndKate", - cookieAndKate, - Constants, - "cookieandkate.com/" -); +commonRecipeTest("cookieAndKate", constants, "cookieandkate.com/"); diff --git a/test/copykat.test.js b/test/copykat.test.js index 404d7d3..b50a9c0 100644 --- a/test/copykat.test.js +++ b/test/copykat.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const copyKat = require("../scrapers/copykat"); -const Constants = require("./constants/copykatConstants"); +const constants = require("./constants/copykatConstants"); -commonRecipeTest("copyKat", copyKat, Constants, "copykat.com/"); +commonRecipeTest("copyKat", constants, "copykat.com/"); diff --git a/test/damndelicious.test.js b/test/damndelicious.test.js index 12ab534..e292b0b 100644 --- a/test/damndelicious.test.js +++ b/test/damndelicious.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const damnDelicious = require("../scrapers/damndelicious"); -const Constants = require("./constants/damndeliciousConstants"); +const constants = require("./constants/damndeliciousConstants"); -commonRecipeTest( - "damnDelicious", - damnDelicious, - Constants, - "damndelicious.net" -); +commonRecipeTest("damnDelicious", constants, "damndelicious.net"); diff --git a/test/eatingwell.test.js b/test/eatingwell.test.js index 5debf55..839b74d 100644 --- a/test/eatingwell.test.js +++ b/test/eatingwell.test.js @@ -1,28 +1,36 @@ "use strict"; -const expect = require("chai").expect; -const assert = require("chai").assert; +const { assert, expect } = require("chai"); -const eatingWell = require("../scrapers/eatingwell"); -const Constants = require("./constants/eatingwellConstants"); +const EatingWellScraper = require("../scrapers/EatingWellScraper"); +const constants = require("./constants/eatingwellConstants"); describe("eatingWell", () => { + let eatingWell; + + before(() => { + eatingWell = new EatingWellScraper(); + }); + it("should fetch the expected recipe", async () => { - let actualRecipe = await eatingWell(Constants.testUrl); - expect(JSON.stringify(Constants.expectedRecipe)).to.equal( + eatingWell.url = constants.testUrl; + let actualRecipe = await eatingWell.fetchRecipe(); + expect(JSON.stringify(constants.expectedRecipe)).to.equal( JSON.stringify(actualRecipe) ); }); it("should fetch another expected recipe", async () => { - let actualRecipe = await eatingWell(Constants.testUrl2); - expect(JSON.stringify(Constants.expectedRecipe2)).to.equal( + eatingWell.url = constants.testUrl2; + let actualRecipe = await eatingWell.fetchRecipe(); + expect(JSON.stringify(constants.expectedRecipe2)).to.equal( JSON.stringify(actualRecipe) ); }); it("should throw an error if a problem occurred during page retrieval", async () => { try { - await eatingWell(Constants.invalidUrl); + eatingWell.url = constants.invalidUrl; + await eatingWell.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); @@ -31,7 +39,8 @@ describe("eatingWell", () => { it("should throw an error if the url doesn't contain required sub-url", async () => { try { - await eatingWell(Constants.invalidDomainUrl); + eatingWell.url = constants.invalidDomainUrl; + await eatingWell.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( @@ -42,7 +51,8 @@ describe("eatingWell", () => { it("should throw an error if non-recipe page is used", async () => { try { - await eatingWell(Constants.nonRecipeUrl); + eatingWell.url = constants.nonRecipeUrl; + await eatingWell.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); diff --git a/test/epicurious.test.js b/test/epicurious.test.js index 61534b0..e880c7c 100644 --- a/test/epicurious.test.js +++ b/test/epicurious.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const epicurious = require("../scrapers/epicurious"); -const Constants = require("./constants/epicuriousConstants"); +const constants = require("./constants/epicuriousConstants"); -commonRecipeTest( - "epicurious", - epicurious, - Constants, - "epicurious.com/recipes/" -); +commonRecipeTest("epicurious", constants, "epicurious.com/recipes/"); diff --git a/test/food.test.js b/test/food.test.js index 7e55fef..301ab74 100644 --- a/test/food.test.js +++ b/test/food.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const food = require("../scrapers/food"); -const Constants = require("./constants/foodConstants"); +const constants = require("./constants/foodConstants"); -commonRecipeTest("food", food, Constants, "food.com/recipe/"); +commonRecipeTest("food", constants, "food.com/recipe/"); diff --git a/test/foodandwine.test.js b/test/foodandwine.test.js index 913fecd..de57e12 100644 --- a/test/foodandwine.test.js +++ b/test/foodandwine.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const foodAndWine = require("../scrapers/foodandwine"); -const Constants = require("./constants/foodandwineConstants"); +const constants = require("./constants/foodandwineConstants"); -commonRecipeTest( - "foodAndWine", - foodAndWine, - Constants, - "foodandwine.com/recipes/" -); +commonRecipeTest("foodAndWine", constants, "foodandwine.com/recipes/"); diff --git a/test/foodnetwork.test.js b/test/foodnetwork.test.js index 19dc278..2fa109a 100644 --- a/test/foodnetwork.test.js +++ b/test/foodnetwork.test.js @@ -1,28 +1,36 @@ "use strict"; -const expect = require("chai").expect; -const assert = require("chai").assert; +const { assert, expect } = require("chai"); -const foodNetwork = require("../scrapers/foodnetwork"); -const Constants = require("./constants/foodnetworkConstants"); +const FoodNetworkScraper = require("../scrapers/FoodNetworkScraper"); +const constants = require("./constants/foodnetworkConstants"); describe("foodNetwork", () => { + let foodNetwork; + + before(() => { + foodNetwork = new FoodNetworkScraper(); + }); + it("should fetch the expected recipe(1)", async () => { - let actualRecipe = await foodNetwork(Constants.testUrl); - expect(JSON.stringify(Constants.expectedRecipe)).to.equal( + foodNetwork.url = constants.testUrl; + let actualRecipe = await foodNetwork.fetchRecipe(); + expect(JSON.stringify(constants.expectedRecipe)).to.equal( JSON.stringify(actualRecipe) ); }); it("should fetch the expected recipe(2)", async () => { - let actualRecipe = await foodNetwork(Constants.anotherTestUrl); - expect(JSON.stringify(Constants.anotherExpectedRecipe)).to.equal( + foodNetwork.url = constants.anotherTestUrl; + let actualRecipe = await foodNetwork.fetchRecipe(); + expect(JSON.stringify(constants.anotherExpectedRecipe)).to.equal( JSON.stringify(actualRecipe) ); }); it("should throw an error if invalid url is used", async () => { try { - await foodNetwork(Constants.invalidDomainUrl); + foodNetwork.url = constants.invalidDomainUrl; + await foodNetwork.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( @@ -33,7 +41,8 @@ describe("foodNetwork", () => { it("should throw an error if a problem occurred during page retrieval", async () => { try { - await foodNetwork(Constants.invalidUrl); + foodNetwork.url = constants.invalidUrl; + await foodNetwork.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); @@ -42,7 +51,8 @@ describe("foodNetwork", () => { it("should throw an error if non-recipe page is used", async () => { try { - await foodNetwork(Constants.nonRecipeUrl); + foodNetwork.url = constants.nonRecipeUrl; + await foodNetwork.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); diff --git a/test/gimmedelicious.test.js b/test/gimmedelicious.test.js index 0433b9d..57192f1 100644 --- a/test/gimmedelicious.test.js +++ b/test/gimmedelicious.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const gimmeDelicious = require("../scrapers/gimmedelicious"); -const Constants = require("./constants/gimmedeliciousConstants"); +const constants = require("./constants/gimmedeliciousConstants"); -commonRecipeTest( - "gimmeDelicious", - gimmeDelicious, - Constants, - "gimmedelicious.com/" -); +commonRecipeTest("gimmeDelicious", constants, "gimmedelicious.com/"); diff --git a/test/gimmesomeoven.test.js b/test/gimmesomeoven.test.js index 39b99f2..3b9d2d2 100644 --- a/test/gimmesomeoven.test.js +++ b/test/gimmesomeoven.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const gimmeSomeOven = require("../scrapers/gimmesomeoven"); -const Constants = require("./constants/gimmesomeovenConstants"); +const constants = require("./constants/gimmesomeovenConstants"); -commonRecipeTest( - "gimmeSomeOven", - gimmeSomeOven, - Constants, - "gimmesomeoven.com/" -); +commonRecipeTest("gimmeSomeOven", constants, "gimmesomeoven.com/"); diff --git a/test/julieblanner.test.js b/test/julieblanner.test.js index 7a8c754..bb29ffb 100644 --- a/test/julieblanner.test.js +++ b/test/julieblanner.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const julieBlanner = require("../scrapers/julieblanner"); -const Constants = require("./constants/julieblannerConstants"); +const constants = require("./constants/julieblannerConstants"); -commonRecipeTest("julieBlanner", julieBlanner, Constants, "julieblanner.com/"); +commonRecipeTest("julieBlanner", constants, "julieblanner.com/"); diff --git a/test/kitchenStories.test.js b/test/kitchenStories.test.js index fddb337..ad8cedd 100644 --- a/test/kitchenStories.test.js +++ b/test/kitchenStories.test.js @@ -1,11 +1,9 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const kitchenStories = require("../scrapers/kitchenstories"); -const Constants = require("./constants/kitchenstoriesConstants"); +const constants = require("./constants/kitchenstoriesConstants"); commonRecipeTest( "kitchenStories", - kitchenStories, - Constants, + constants, "kitchenstories.com/en/recipes' or 'kitchenstories.com/de/rezepte" ); diff --git a/test/nomnompaleo.test.js b/test/nomnompaleo.test.js index 4b15b65..9d003c6 100644 --- a/test/nomnompaleo.test.js +++ b/test/nomnompaleo.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const nomNomPaleo = require("../scrapers/nomnompaleo"); -const Constants = require("./constants/nomnompaleoConstants"); +const constants = require("./constants/nomnompaleoConstants"); -commonRecipeTest("nomnompaleo", nomNomPaleo, Constants, "nomnompaleo.com/"); +commonRecipeTest("nomnompaleo", constants, "nomnompaleo.com/"); diff --git a/test/tastesbetterfromscratch.test.js b/test/tastesbetterfromscratch.test.js index 4be8525..4a91f01 100644 --- a/test/tastesbetterfromscratch.test.js +++ b/test/tastesbetterfromscratch.test.js @@ -1,11 +1,9 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const tastesBetterFromScratch = require("../scrapers/tastesbetterfromscratch"); -const Constants = require("./constants/tastebetterfromscratchConstants"); +const constants = require("./constants/tastebetterfromscratchConstants"); commonRecipeTest( "tastesBetterFromScratch", - tastesBetterFromScratch, - Constants, + constants, "tastesbetterfromscratch.com" ); diff --git a/test/thereaddealfoodrds.test.js b/test/thereaddealfoodrds.test.js index a0a7930..1ce5088 100644 --- a/test/thereaddealfoodrds.test.js +++ b/test/thereaddealfoodrds.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const theRealDealFoodRds = require("../scrapers/therealfoodrds"); -const Constants = require("./constants/therealdealfoodrdsConstants"); +const constants = require("./constants/therealdealfoodrdsConstants"); -commonRecipeTest( - "theRealDealFoodRds", - theRealDealFoodRds, - Constants, - "therealfoodrds.com/" -); +commonRecipeTest("theRealDealFoodRds", constants, "therealfoodrds.com/"); diff --git a/test/yummly.test.js b/test/yummly.test.js index 7365466..cdf195e 100644 --- a/test/yummly.test.js +++ b/test/yummly.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const yummly = require("../scrapers/yummly"); -const Constants = require("./constants/yummlyConstants"); +const constants = require("./constants/yummlyConstants"); -commonRecipeTest("yummly", yummly, Constants, "yummly.com/recipe"); +commonRecipeTest("yummly", constants, "yummly.com/recipe"); From e6d6cc046d5ed8f687841e0d228f33da4ef67a48 Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Thu, 21 Jan 2021 13:36:27 -0700 Subject: [PATCH 09/11] a handful of more refactored scrapes --- helpers/BaseScraper.js | 4 ++ helpers/ScraperFactory.js | 21 +++--- scrapers/BbcGoodFoodScraper.js | 9 ++- scrapers/KitchenStoriesScraper.js | 10 +-- scrapers/MelsKitchenCafeScraper.js | 46 ++++++++++++ scrapers/MinimalistBakerScraper.js | 61 ++++++++++++++++ scrapers/MyRecipesScraper.js | 36 ++++++++++ scrapers/OmnivoresCookbookScraper.js | 73 ++++++++++++++++++++ scrapers/PinchOfYumScraper.js | 49 +++++++++++++ scrapers/RecipeTinEatsScraper.js | 57 +++++++++++++++ scrapers/SeriousEatsScraper.js | 69 ++++++++++++++++++ scrapers/index.js | 6 +- scrapers/melskitchencafe.js | 64 ----------------- scrapers/minimalistbaker.js | 80 --------------------- scrapers/myrecipes.js | 64 ----------------- scrapers/omnivorescookbook.js | 90 ------------------------ scrapers/pinchofyum.js | 66 ------------------ scrapers/recipetineats.js | 76 -------------------- scrapers/seriouseats.js | 96 -------------------------- test/constants/allRecipesConstants.js | 4 +- test/constants/eatingwellConstants.js | 4 +- test/constants/foodandwineConstants.js | 2 +- test/constants/foodnetworkConstants.js | 2 +- test/constants/myrecipesConstants.js | 2 +- test/constants/pinchofyumConstants.js | 7 +- test/helpers/commonRecipeTest.js | 2 +- test/melskitchencafe.test.js | 10 +-- test/minimalistbaker.test.js | 10 +-- test/myrecipes.test.js | 5 +- test/omnivorescookbook.test.js | 10 +-- test/pinchofyum.test.js | 5 +- test/seriouseats.test.js | 24 ++++--- 32 files changed, 450 insertions(+), 614 deletions(-) create mode 100644 scrapers/MelsKitchenCafeScraper.js create mode 100644 scrapers/MinimalistBakerScraper.js create mode 100644 scrapers/MyRecipesScraper.js create mode 100644 scrapers/OmnivoresCookbookScraper.js create mode 100644 scrapers/PinchOfYumScraper.js create mode 100644 scrapers/RecipeTinEatsScraper.js create mode 100644 scrapers/SeriousEatsScraper.js delete mode 100644 scrapers/melskitchencafe.js delete mode 100644 scrapers/minimalistbaker.js delete mode 100644 scrapers/myrecipes.js delete mode 100644 scrapers/omnivorescookbook.js delete mode 100644 scrapers/pinchofyum.js delete mode 100644 scrapers/recipetineats.js delete mode 100644 scrapers/seriouseats.js diff --git a/helpers/BaseScraper.js b/helpers/BaseScraper.js index 78d6254..39d0e21 100644 --- a/helpers/BaseScraper.js +++ b/helpers/BaseScraper.js @@ -74,6 +74,10 @@ class BaseScraper { throw new Error("scrape is not defined in BaseScraper"); } + textTrim(el) { + return el.text().trim(); + } + /** * */ diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index c750453..994ae5a 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -25,14 +25,14 @@ const domains = { gimmesomeoven: require("../scrapers/GimmeSomeOvenScraper"), julieblanner: require("../scrapers/JulieBlannerScraper"), kitchenstories: require("../scrapers/KitchenStoriesScraper"), - melskitchencafe: require("../scrapers/melskitchencafe"), - minimalistbaker: require("../scrapers/minimalistbaker"), - myrecipes: require("../scrapers/myrecipes"), + melskitchencafe: require("../scrapers/MelsKitchenCafeScraper"), + minimalistbaker: require("../scrapers/MinimalistBakerScraper"), + myrecipes: require("../scrapers/MyRecipesScraper"), nomnompaleo: require("../scrapers/NomNomPaleoScraper"), - omnivorescookbook: require("../scrapers/omnivorescookbook"), - pinchofyum: require("../scrapers/pinchofyum"), - recipetineats: require("../scrapers/recipetineats"), - seriouseats: require("../scrapers/seriouseats"), + omnivorescookbook: require("../scrapers/OmnivoresCookbookScraper"), + pinchofyum: require("../scrapers/PinchOfYumScraper"), + recipetineats: require("../scrapers/RecipeTinEatsScraper"), + seriouseats: require("../scrapers/SeriousEatsScraper"), simplyrecipes: require("../scrapers/simplyrecipes"), smittenkitchen: require("../scrapers/smittenkitchen"), tastesbetterfromscratch: require("../scrapers/TastesBetterFromScratchScraper"), @@ -48,7 +48,7 @@ const domains = { }; /** - * A Singleton Factory to whom supplies an instance of a scraper based on a give URL + * A Factory that supplies an instance of a scraper based on a given URL */ class ScraperFactory { getScraper(url) { @@ -66,7 +66,4 @@ class ScraperFactory { } } -const singletonFactory = new ScraperFactory(); -Object.freeze(singletonFactory); - -module.exports = singletonFactory; +module.exports = ScraperFactory; diff --git a/scrapers/BbcGoodFoodScraper.js b/scrapers/BbcGoodFoodScraper.js index 49016b4..b895be1 100644 --- a/scrapers/BbcGoodFoodScraper.js +++ b/scrapers/BbcGoodFoodScraper.js @@ -16,8 +16,8 @@ class BbcGoodFoodScraper extends BaseScraper { const { ingredients, instructions, time } = this.recipe; this.recipe.name = $("meta[name='og:title']").attr("content"); - $(".recipe-template__ingredients") - .find(".list-item") + $(".recipe__ingredients") + .find("li") .each((i, el) => { ingredients.push( $(el) @@ -26,9 +26,8 @@ class BbcGoodFoodScraper extends BaseScraper { ); }); - $(".recipe-template__method-steps") - .find(".list-item") - .children("div") + $(".recipe__method-steps") + .find("p") .each((i, el) => { instructions.push($(el).text()); }); diff --git a/scrapers/KitchenStoriesScraper.js b/scrapers/KitchenStoriesScraper.js index 92fb6ae..3700f4b 100644 --- a/scrapers/KitchenStoriesScraper.js +++ b/scrapers/KitchenStoriesScraper.js @@ -53,25 +53,25 @@ class KitchenStoriesScraper extends BaseScraper { let title = $(el) .children(".title") .text(); - let time = $(el) + let timeText = $(el) .find(".time") .text(); let unit = $(el) .find(".unit") .text(); - if (parseInt(time)) { + if (parseInt(timeText)) { switch (title) { case "Preparation": case "Zubereitung": - time.prep = `${time} ${unit}`; + time.prep = `${timeText} ${unit}`; break; case "Baking": case "Backzeit": - time.cook = `${time} ${unit}`; + time.cook = `${timeText} ${unit}`; break; case "Resting": case "Ruhezeit": - time.inactive = `${time} ${unit}`; + time.inactive = `${timeText} ${unit}`; break; default: } diff --git a/scrapers/MelsKitchenCafeScraper.js b/scrapers/MelsKitchenCafeScraper.js new file mode 100644 index 0000000..6b89d7e --- /dev/null +++ b/scrapers/MelsKitchenCafeScraper.js @@ -0,0 +1,46 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping melskitchencafe.com + * @extends BaseScraper + */ +class MelsKitchenCafeScraper extends BaseScraper { + constructor(url) { + super(url, "melskitchencafe.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + + this.recipe.name = this.textTrim( + $(".wp-block-mv-recipe .mv-create-title-primary") + ); + + $("div.mv-create-ingredients ul li").each((i, el) => { + ingredients.push(this.textTrim($(el))); + }); + + $("div.mv-create-instructions ol li").each((i, el) => { + instructions.push(this.textTrim($(el))); + }); + + time.prep = this.textTrim($(".mv-create-time-prep .mv-create-time-format")); + time.cook = this.textTrim( + $(".mv-create-time-active .mv-create-time-format") + ); + time.inactive = this.textTrim( + $(".mv-create-time-additional .mv-create-time-format") + ); + time.total = this.textTrim( + $(".mv-create-time-total .mv-create-time-format") + ); + this.recipe.servings = this.textTrim( + $(".mv-create-time-yield .mv-create-time-format") + ); + } +} + +module.exports = MelsKitchenCafeScraper; diff --git a/scrapers/MinimalistBakerScraper.js b/scrapers/MinimalistBakerScraper.js new file mode 100644 index 0000000..b7cf6c0 --- /dev/null +++ b/scrapers/MinimalistBakerScraper.js @@ -0,0 +1,61 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping minimalistbaker.com + * @extends BaseScraper + */ +class MinimalistBakerScraper extends BaseScraper { + constructor(url) { + super(url, "minimalistbaker.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name").text(); + + $(".wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/\s\s+/g, " ") + .trim() + ); + }); + + $(".wprm-recipe-instruction-group").each((i, el) => { + let group = $(el) + .children(".wprm-recipe-group-name") + .text(); + if (group.length) { + instructions.push(group); + } + $(el) + .find(".wprm-recipe-instruction-text") + .each((i, elChild) => { + instructions.push($(elChild).text()); + }); + }); + + this.recipe.tags = $(".wprm-recipe-cuisine") + .text() + .split(",") + .map(x => x.trim()); + + time.prep = $(".wprm-recipe-time") + .first() + .text(); + time.cook = $($(".wprm-recipe-time").get(1)).text(); + time.total = $(".wprm-recipe-time") + .last() + .text(); + + this.recipe.servings = $(".wprm-recipe-servings") + .first() + .text(); + } +} + +module.exports = MinimalistBakerScraper; diff --git a/scrapers/MyRecipesScraper.js b/scrapers/MyRecipesScraper.js new file mode 100644 index 0000000..5f79ba4 --- /dev/null +++ b/scrapers/MyRecipesScraper.js @@ -0,0 +1,36 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping myrecipes.com + * @extends BaseScraper + */ +class MyRecipesScraper extends BaseScraper { + constructor(url) { + super(url, "myrecipes.com/recipe"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = this.textTrim($("h1.headline")); + + $(".ingredients-item").each((i, el) => { + const ingredient = this.textTrim($(el)).replace(/\s\s+/g, " "); + ingredients.push(ingredient); + }); + $($(".instructions-section-item").find("p")).each((i, el) => { + instructions.push($(el).text()); + }); + + let metaBody = $(".recipe-meta-item-body"); + + time.active = this.textTrim(metaBody.first()); + time.total = this.textTrim($(metaBody.get(1))); + + this.recipe.servings = this.textTrim(metaBody.last()); + } +} + +module.exports = MyRecipesScraper; diff --git a/scrapers/OmnivoresCookbookScraper.js b/scrapers/OmnivoresCookbookScraper.js new file mode 100644 index 0000000..54c4dc6 --- /dev/null +++ b/scrapers/OmnivoresCookbookScraper.js @@ -0,0 +1,73 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping omnivorescookbook.com + * @extends BaseScraper + */ +class OmnivoresCookbookScraper extends BaseScraper { + constructor(url) { + super(url, "omnivorescookbook.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name").text(); + + $(".wprm-recipe-ingredient-group").each((i, el) => { + let group = $(el) + .find(".wprm-recipe-group-name") + .text(); + if (group) { + ingredients.push(group); + } + $(el) + .find(".wprm-recipe-ingredient") + .each((i, el) => { + ingredients.push(this.textTrim($(el)).replace(/\s\s+/g, " ")); + }); + }); + + $(".wprm-recipe-instruction-group").each((i, el) => { + instructions.push( + $(el) + .children(".wprm-recipe-group-name") + .text() + ); + $(el) + .find(".wprm-recipe-instruction-text") + .each((i, elChild) => { + instructions.push($(elChild).text()); + }); + }); + + this.recipe.tags = $(".wprm-recipe-keyword") + .text() + .split(",") + .map(x => x.trim()); + + $(".wprm-recipe-time-container").each((i, el) => { + let label = $(el) + .children(".wprm-recipe-time-label") + .text(); + let timeText = $(el) + .children(".wprm-recipe-time") + .text(); + if (label.includes("Prep")) { + time.prep = timeText; + } else if (label.includes("Cook")) { + time.cook = timeText; + } else if (label.includes("Resting")) { + time.inactive = timeText; + } else if (label.includes("Total")) { + time.total = timeText; + } + }); + + this.recipe.servings = $(".wprm-recipe-servings-with-unit").text(); + } +} + +module.exports = OmnivoresCookbookScraper; diff --git a/scrapers/PinchOfYumScraper.js b/scrapers/PinchOfYumScraper.js new file mode 100644 index 0000000..5893a74 --- /dev/null +++ b/scrapers/PinchOfYumScraper.js @@ -0,0 +1,49 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping pinchofyum.com + * @extends BaseScraper + */ +class PinchOfYumScraper extends BaseScraper { + constructor(url) { + super(url, "pinchofyum.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $("meta[property='og:title']").attr("content"); + + $(".tasty-recipes-ingredients") + .find("li") + .each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".tasty-recipes-instructions") + .find("li") + .each((i, el) => { + instructions.push($(el).text()); + }); + + const tags = new Set(); + $("meta[property='slick:category']").each((i, el) => { + const tag = $(el) + .attr("content") + .split(";") + .forEach(str => tags.add(str.split(":")[1])); + }); + this.recipe.tags = [...tags]; + + time.prep = $(".tasty-recipes-prep-time").text(); + time.cook = $(".tasty-recipes-cook-time").text(); + time.total = $(".tasty-recipes-total-time").text(); + + $(".tasty-recipes-yield-scale").remove(); + this.recipe.servings = this.textTrim($(".tasty-recipes-yield")); + } +} + +module.exports = PinchOfYumScraper; diff --git a/scrapers/RecipeTinEatsScraper.js b/scrapers/RecipeTinEatsScraper.js new file mode 100644 index 0000000..6704aa1 --- /dev/null +++ b/scrapers/RecipeTinEatsScraper.js @@ -0,0 +1,57 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping recipetineats.com + * @extends BaseScraper + */ +class RecipeTinEatsScraper extends BaseScraper { + constructor(url) { + super(url, "recipetineats.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $("meta[property='og:title']").attr("content"); + + $(".wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + this.textTrim( + $(el) + .replace(/\s\s+/g, " ") + .replace("▢", "") + ) + ); + }); + + $(".wprm-recipe-instruction-group").each((i, el) => { + instructions.push( + $(el) + .children(".wprm-recipe-group-name") + .text() + ); + $(el) + .find(".wprm-recipe-instruction-text") + .each((i, elChild) => { + instructions.push($(elChild).text()); + }); + }); + + $(".wprm-recipe-time-container").each((i, el) => { + let text = $(el).text(); + if (text.includes("Prep:")) { + time.total = text.replace("Prep:", "").trim(); + } else if (text.includes("Cook:")) { + time.prep = text.replace("Cook:", "").trim(); + } else if (text.includes("Total:")) { + time.cook = text.replace("Total:", "").trim(); + } + }); + + this.recipe.servings = this.textTrim($(".wprm-recipe-time").first()); + } +} + +module.exports = RecipeTinEatsScraper; diff --git a/scrapers/SeriousEatsScraper.js b/scrapers/SeriousEatsScraper.js new file mode 100644 index 0000000..2437d3e --- /dev/null +++ b/scrapers/SeriousEatsScraper.js @@ -0,0 +1,69 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping bbc.co + * @extends BaseScraper + */ +class SeriousEatsScraper extends BaseScraper { + constructor(url) { + super(url, "seriouseats.com/"); + if (this.url.includes("seriouseats.com/sponsored/")) { + throw new Error("seriouseats.com sponsored recipes not supported"); + } + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".recipe-title") + .text() + .replace(/\s\s+/g, ""); + + $(".ingredient").each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".recipe-about") + .children("li") + .each((i, el) => { + const label = $(el) + .children(".label") + .text(); + const info = $(el) + .children(".info") + .text(); + + if (label.includes("Active")) { + time.active = info; + } else if (label.includes("Total")) { + time.total = info; + } else if (label.includes("Yield")) { + this.recipe.servings = info; + } + }); + + let tagsSet = new Set(); + $("li[class='label label-category top-level']").each((i, el) => { + let text = $(el) + .find("a") + .text(); + if (text) { + tagsSet.add(text); + } + }); + + this.recipe.tags = Array.from(tagsSet); + + $(".recipe-procedure-text").each((i, el) => { + instructions.push( + $(el) + .text() + .replace(/\s\s+/g, "") + ); + }); + } +} + +module.exports = SeriousEatsScraper; diff --git a/scrapers/index.js b/scrapers/index.js index 99a9ac8..92627c5 100644 --- a/scrapers/index.js +++ b/scrapers/index.js @@ -3,12 +3,8 @@ const ScraperFactory = require("../helpers/ScraperFactory"); const recipeScraper = async url => { - let klass = ScraperFactory.getScraper(url); + let klass = new ScraperFactory().getScraper(url); return await klass.fetchRecipe(); }; -recipeScraper( - "https://www.epicurious.com/recipes/food/views/trout-toast-with-soft-scrambled-eggs" -).then(recipe => console.log(recipe)); - module.exports = recipeScraper; diff --git a/scrapers/melskitchencafe.js b/scrapers/melskitchencafe.js deleted file mode 100644 index c12c445..0000000 --- a/scrapers/melskitchencafe.js +++ /dev/null @@ -1,64 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const melskitchencafe = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("melskitchencafe.com/")) { - reject(new Error("url provided must include 'melskitchencafe.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - const textTrim = el => el.text().trim(); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = textTrim( - $(".wp-block-mv-recipe .mv-create-title-primary") - ); - - $("div.mv-create-ingredients ul li").each((i, el) => { - Recipe.ingredients.push(textTrim($(el))); - }); - - $("div.mv-create-instructions ol li").each((i, el) => { - Recipe.instructions.push(textTrim($(el))); - }); - - Recipe.time.prep = textTrim( - $(".mv-create-time-prep .mv-create-time-format") - ); - Recipe.time.cook = textTrim( - $(".mv-create-time-active .mv-create-time-format") - ); - Recipe.time.inactive = textTrim( - $(".mv-create-time-additional .mv-create-time-format") - ); - Recipe.time.total = textTrim( - $(".mv-create-time-total .mv-create-time-format") - ); - Recipe.servings = textTrim( - $(".mv-create-time-yield .mv-create-time-format") - ); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = melskitchencafe; diff --git a/scrapers/minimalistbaker.js b/scrapers/minimalistbaker.js deleted file mode 100644 index 81e5a7e..0000000 --- a/scrapers/minimalistbaker.js +++ /dev/null @@ -1,80 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const minimalistBaker = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("minimalistbaker.com/")) { - reject(new Error("url provided must include 'minimalistbaker.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - const body = $(".wprm-recipe-container"); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name").text(); - - $(".wprm-recipe-ingredient").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .trim() - ); - }); - - $(".wprm-recipe-instruction-group").each((i, el) => { - let group = $(el) - .children(".wprm-recipe-group-name") - .text(); - if (group.length) Recipe.instructions.push(group); - $(el) - .find(".wprm-recipe-instruction-text") - .each((i, elChild) => { - Recipe.instructions.push($(elChild).text()); - }); - }); - - $(".wprm-recipe-cuisine").each((i, el) => { - Recipe.tags.push( - $(el) - .find("a") - .text() - ); - }); - - Recipe.tags = $(".wprm-recipe-cuisine").text().split(',').map(x => x.trim()); - - Recipe.time.prep = $(".wprm-recipe-time") - .first() - .text(); - Recipe.time.cook = $($(".wprm-recipe-time").get(1)).text(); - Recipe.time.total = $(".wprm-recipe-time") - .last() - .text(); - - Recipe.servings = $(".wprm-recipe-servings") - .first() - .text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = minimalistBaker; diff --git a/scrapers/myrecipes.js b/scrapers/myrecipes.js deleted file mode 100644 index 1d992d9..0000000 --- a/scrapers/myrecipes.js +++ /dev/null @@ -1,64 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const myRecipes = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("myrecipes.com/recipe")) { - reject(new Error("url provided must include 'myrecipes.com/recipe'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $("h1.headline") - .text() - .trim(); - - $(".ingredients-item").each((i, el) => { - const ingredient = $(el) - .text() - .replace(/\s\s+/g, " ") - .trim(); - Recipe.ingredients.push(ingredient); - }); - $($(".instructions-section-item").find("p")).each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - let metaBody = $(".recipe-meta-item-body"); - - Recipe.time.active = metaBody - .first() - .text() - .trim(); - Recipe.time.total = $(metaBody.get(1)) - .text() - .trim(); - - Recipe.servings = metaBody - .last() - .text() - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = myRecipes; diff --git a/scrapers/omnivorescookbook.js b/scrapers/omnivorescookbook.js deleted file mode 100644 index 8ef1363..0000000 --- a/scrapers/omnivorescookbook.js +++ /dev/null @@ -1,90 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const omnivorescookbook = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("omnivorescookbook.com/")) { - reject(new Error("url provided must include 'omnivorescookbook.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name").text(); - - $(".wprm-recipe-ingredient-group").each((i, el) => { - let group = $(el) - .find(".wprm-recipe-group-name") - .text(); - if (group) { - Recipe.ingredients.push(group); - } - $(el) - .find(".wprm-recipe-ingredient") - .each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .trim() - ); - }); - }); - - $(".wprm-recipe-instruction-group").each((i, el) => { - Recipe.instructions.push( - $(el) - .children(".wprm-recipe-group-name") - .text() - ); - $(el) - .find(".wprm-recipe-instruction-text") - .each((i, elChild) => { - Recipe.instructions.push($(elChild).text()); - }); - }); - - Recipe.tags = $(".wprm-recipe-keyword").text().split(',').map(x => x.trim()); - - $(".wprm-recipe-time-container").each((i, el) => { - let label = $(el) - .children(".wprm-recipe-time-label") - .text(); - let time = $(el) - .children(".wprm-recipe-time") - .text(); - if (label.includes("Prep")) { - Recipe.time.prep = time; - } else if (label.includes("Cook")) { - Recipe.time.cook = time; - } else if (label.includes("Resting")) { - Recipe.time.inactive = time; - } else if (label.includes("Total")) { - Recipe.time.total = time; - } - }); - - Recipe.servings = $(".wprm-recipe-servings-with-unit").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = omnivorescookbook; diff --git a/scrapers/pinchofyum.js b/scrapers/pinchofyum.js deleted file mode 100644 index a3aea42..0000000 --- a/scrapers/pinchofyum.js +++ /dev/null @@ -1,66 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const tasteOfYum = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("pinchofyum.com/")) { - reject(new Error("url provided must include 'pinchofyum.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $("meta[property='og:title']").attr("content"); - - $(".tasty-recipes-ingredients") - .find("li") - .each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".tasty-recipes-instructions") - .find("li") - .each((i, el) => { - Recipe.instructions.push($(el).text()); - }); - - const tags = new Set(); - $("meta[property='slick:category']").each((i, el) => { - const tag = $(el) - .attr("content") - .split(";") - .forEach(str => tags.add(str.split(":")[1])); - }); - Recipe.tags = [...tags]; - - Recipe.time.prep = $(".tasty-recipes-prep-time").text(); - Recipe.time.cook = $(".tasty-recipes-cook-time").text(); - Recipe.time.total = $(".tasty-recipes-total-time").text(); - - $(".tasty-recipes-yield-scale").remove(); - Recipe.servings = $(".tasty-recipes-yield") - .text() - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = tasteOfYum; diff --git a/scrapers/recipetineats.js b/scrapers/recipetineats.js deleted file mode 100644 index 27c82f3..0000000 --- a/scrapers/recipetineats.js +++ /dev/null @@ -1,76 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const recipeTinEats = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("recipetineats.com/")) { - reject(new Error("url provided must include 'recipetineats.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - const body = $(".wprm-recipe-container"); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $("meta[property='og:title']").attr("content"); - - $(".wprm-recipe-ingredient").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .replace("▢", "") - .trim() - ); - }); - - $(".wprm-recipe-instruction-group").each((i, el) => { - Recipe.instructions.push( - $(el) - .children(".wprm-recipe-group-name") - .text() - ); - $(el) - .find(".wprm-recipe-instruction-text") - .each((i, elChild) => { - Recipe.instructions.push($(elChild).text()); - }); - }); - - $(".wprm-recipe-time-container").each((i, el) => { - let text = $(el).text(); - if (text.includes("Prep:")) { - Recipe.time.total = text.replace("Prep:", "").trim(); - } else if (text.includes("Cook:")) { - Recipe.time.prep = text.replace("Cook:", "").trim(); - } else if (text.includes("Total:")) { - Recipe.time.cook = text.replace("Total:", "").trim(); - } - }); - - Recipe.servings = $(".wprm-recipe-time") - .first() - .text() - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = recipeTinEats; diff --git a/scrapers/seriouseats.js b/scrapers/seriouseats.js deleted file mode 100644 index 364cbd7..0000000 --- a/scrapers/seriouseats.js +++ /dev/null @@ -1,96 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const seriousEats = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("seriouseats.com/")) { - reject(new Error("url provided must include 'seriouseats.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - if (url.includes("seriouseats.com/sponsored/")) { - reject( - new Error("seriouseats.com sponsored recipes not supported") - ); - } else { - regularRecipe($, Recipe); - } - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -const regularRecipe = ($, Recipe) => { - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".recipe-title") - .text() - .replace(/\s\s+/g, ""); - - $(".ingredient").each((i, el) => { - const item = $(el).text(); - Recipe.ingredients.push(item); - }); - - $(".recipe-about") - .children("li") - .each((i, el) => { - const label = $(el) - .children(".label") - .text(); - const info = $(el) - .children(".info") - .text(); - - if (label.includes("Active")) { - Recipe.time.active = info; - } else if (label.includes("Total")) { - Recipe.time.total = info; - } else if (label.includes("Yield")) { - Recipe.servings = info; - } - }); - - $("li[class='label label-category top-level']").each((i, el) => { - Recipe.tags.push( - $(el) - .find("a") - .text() - ); - }); - - Recipe.tags = Recipe.tags.filter(item => item); - - function onlyUnique(value, index, self) { - return self.indexOf(value) === index; - } - - Recipe.tags = Recipe.tags.filter(onlyUnique); - - $(".recipe-procedure-text").each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .replace(/\s\s+/g, "") - ); - }); -}; - -module.exports = seriousEats; diff --git a/test/constants/allRecipesConstants.js b/test/constants/allRecipesConstants.js index fbe06a1..d4abccd 100644 --- a/test/constants/allRecipesConstants.js +++ b/test/constants/allRecipesConstants.js @@ -32,7 +32,7 @@ module.exports = { }, servings: "6", image: - "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=2444&h=1280&url=https%3A%2F%2Fimages.media-allrecipes.com%2Fuserphotos%2F2253389.jpg" + "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=2444&h=1222&url=https%3A%2F%2Fimages.media-allrecipes.com%2Fuserphotos%2F2253389.jpg" }, expectedRecipeNew: { name: "Crispy and Tender Baked Chicken Thighs", @@ -63,6 +63,6 @@ module.exports = { }, servings: "8", image: - "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=1300&h=681&url=https%3A%2F%2Fimages.media-allrecipes.com%2Fuserphotos%2F1061355.jpg" + "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=1300&h=650&url=https%3A%2F%2Fimages.media-allrecipes.com%2Fuserphotos%2F1061355.jpg" } }; diff --git a/test/constants/eatingwellConstants.js b/test/constants/eatingwellConstants.js index 70f22d8..da8c03a 100644 --- a/test/constants/eatingwellConstants.js +++ b/test/constants/eatingwellConstants.js @@ -48,7 +48,7 @@ module.exports = { }, servings: "6", image: - "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=960&h=503&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F44%2F2019%2F08%2F26232433%2F5397860.jpg" + "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=960&h=480&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F44%2F2019%2F08%2F26232433%2F5397860.jpg" }, expectedRecipe2: { name: "Mexican Pasta Salad with Creamy Avocado Dressing", @@ -92,6 +92,6 @@ module.exports = { }, servings: "6", image: - "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=960&h=503&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F44%2F2019%2F08%2F26231112%2F3750024.jpg" + "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=960&h=480&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F44%2F2019%2F08%2F26231112%2F3750024.jpg" } }; diff --git a/test/constants/foodandwineConstants.js b/test/constants/foodandwineConstants.js index 1dbd8d8..4369a87 100644 --- a/test/constants/foodandwineConstants.js +++ b/test/constants/foodandwineConstants.js @@ -36,6 +36,6 @@ module.exports = { }, servings: "4", image: - "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=480&h=251&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F9%2F2019%2F03%2F1660653193_6016070154001_6016065802001-vs.jpg" + "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=face&w=480&h=240&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F9%2F2019%2F03%2F1660653193_6016070154001_6016065802001-vs.jpg" } }; diff --git a/test/constants/foodnetworkConstants.js b/test/constants/foodnetworkConstants.js index d599c15..c49eefb 100644 --- a/test/constants/foodnetworkConstants.js +++ b/test/constants/foodnetworkConstants.js @@ -28,7 +28,7 @@ module.exports = { "Increase the heat to medium, add the tomatoes and wine and cook until reduced by half, 2 to 3 minutes, then stir in the chicken broth. Put the pork chops in the sauce and carefully nestle the potatoes around them. Cook 3 to 5 minutes more until the pork chops register 145 degrees F in the center on an instant-read thermometer. Remove the pork chops from the sauce and transfer to shallow bowls or a serving platter. Taste the sauce and season with additional salt and pepper if needed. If most of the liquid in the pan evaporates while you are cooking the pork, stir in tablespoons of water at a time to get it back to a saucy consistency. If the sauce is a little thin and weak, after you take the chops out, turn the heat up and cook 1 to 2 minutes more to thicken and concentrate the flavors. Stir the parsley into the sauce, remove the thyme sprigs, spoon the sauce over the chops and serve." ], tags: [ - "Comfort Food Restaurants", + "Comfort Food", "Cast Iron Skillet", "Skillet Recipes", "French Recipes", diff --git a/test/constants/myrecipesConstants.js b/test/constants/myrecipesConstants.js index 143f8fa..c4f8b6c 100644 --- a/test/constants/myrecipesConstants.js +++ b/test/constants/myrecipesConstants.js @@ -46,6 +46,6 @@ module.exports = { servings: "Serves 4 (serving size: about 4 oz. steak, 3/4 cup potatoes, 1/2 cup broccoli, and 2 Tbsp. aioli)", image: - "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=%5B1000%2C800%5D&w=2000&h=1047&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F19%2F2019%2F01%2F28%2Flondonbroil-2000.jpg" + "https://imagesvc.meredithcorp.io/v3/mm/image?q=85&c=sc&poi=%5B1000%2C800%5D&w=2000&h=1000&url=https%3A%2F%2Fstatic.onecms.io%2Fwp-content%2Fuploads%2Fsites%2F19%2F2019%2F01%2F28%2Flondonbroil-2000.jpg" } }; diff --git a/test/constants/pinchofyumConstants.js b/test/constants/pinchofyumConstants.js index eb4a57e..dc08428 100644 --- a/test/constants/pinchofyumConstants.js +++ b/test/constants/pinchofyumConstants.js @@ -28,14 +28,15 @@ module.exports = { "Toss everything together and season to taste!" ], tags: [ + "Avocado", + "Recipes", "Bowls", - "Dinner", "Healthy", - "Recipes", + "Legume", "Lunch", "Quick and Easy", "Salads", - "Sugar Free", + "Sugar-Free", "Superfoods", "Vegan", "Vegetarian" diff --git a/test/helpers/commonRecipeTest.js b/test/helpers/commonRecipeTest.js index 48bf6cb..de2c76b 100644 --- a/test/helpers/commonRecipeTest.js +++ b/test/helpers/commonRecipeTest.js @@ -6,7 +6,7 @@ const commonRecipeTest = (name, constants, url) => { let scraper; before(() => { - scraper = ScraperFactory.getScraper(url); + scraper = new ScraperFactory().getScraper(url); }); it("should fetch the expected recipe", async () => { diff --git a/test/melskitchencafe.test.js b/test/melskitchencafe.test.js index 3844431..6048b05 100644 --- a/test/melskitchencafe.test.js +++ b/test/melskitchencafe.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const melsKitchenCafe = require("../scrapers/melskitchencafe"); -const Constants = require("./constants/melskitchencafeConstants"); +const constants = require("./constants/melskitchencafeConstants"); -commonRecipeTest( - "melsKitchenCafe", - melsKitchenCafe, - Constants, - "melskitchencafe.com/" -); +commonRecipeTest("melsKitchenCafe", constants, "melskitchencafe.com/"); diff --git a/test/minimalistbaker.test.js b/test/minimalistbaker.test.js index 3e7b305..59d5989 100644 --- a/test/minimalistbaker.test.js +++ b/test/minimalistbaker.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const minimalistBaker = require("../scrapers/minimalistbaker"); -const Constants = require("./constants/minimalistbakerConstants"); +const constants = require("./constants/minimalistbakerConstants"); -commonRecipeTest( - "minimalistbaker", - minimalistBaker, - Constants, - "minimalistbaker.com/" -); +commonRecipeTest("minimalistbaker", constants, "minimalistbaker.com/"); diff --git a/test/myrecipes.test.js b/test/myrecipes.test.js index 3e73bb2..2557694 100644 --- a/test/myrecipes.test.js +++ b/test/myrecipes.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const myRecipes = require("../scrapers/myrecipes"); -const Constants = require("./constants/myrecipesConstants"); +const constants = require("./constants/myrecipesConstants"); -commonRecipeTest("myRecipes", myRecipes, Constants, "myrecipes.com/recipe"); +commonRecipeTest("myRecipes", constants, "myrecipes.com/recipe"); diff --git a/test/omnivorescookbook.test.js b/test/omnivorescookbook.test.js index b4c8840..463948d 100644 --- a/test/omnivorescookbook.test.js +++ b/test/omnivorescookbook.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const omnivoresCookbook = require("../scrapers/omnivorescookbook"); -const Constants = require("./constants/omnivorescookbookConstants"); +const constants = require("./constants/omnivorescookbookConstants"); -commonRecipeTest( - "omnivorescookbook", - omnivoresCookbook, - Constants, - "omnivorescookbook.com/" -); +commonRecipeTest("omnivorescookbook", constants, "omnivorescookbook.com/"); diff --git a/test/pinchofyum.test.js b/test/pinchofyum.test.js index e85c696..01279fa 100644 --- a/test/pinchofyum.test.js +++ b/test/pinchofyum.test.js @@ -1,6 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const pinchOfYum = require("../scrapers/pinchofyum"); -const Constants = require("./constants/pinchofyumConstants"); +const constants = require("./constants/pinchofyumConstants"); -commonRecipeTest("pinchOfYum", pinchOfYum, Constants, "pinchofyum.com/"); +commonRecipeTest("pinchOfYum", constants, "pinchofyum.com/"); diff --git a/test/seriouseats.test.js b/test/seriouseats.test.js index edec6d3..81a541f 100644 --- a/test/seriouseats.test.js +++ b/test/seriouseats.test.js @@ -1,21 +1,22 @@ "use strict"; -const expect = require("chai").expect; -const assert = require("chai").assert; +const { assert, expect } = require("chai"); -const seriousEats = require("../scrapers/seriouseats"); -const Constants = require("./constants/seriouseatsConstants"); +const seriousEats = require("../scrapers/SeriousEatsScraper"); +const constants = require("./constants/seriouseatsConstants"); describe("seriousEats", () => { it("should fetch the expected recipe", async () => { - let actualRecipe = await seriousEats(Constants.testUrl); - expect(JSON.stringify(Constants.expectedRecipe)).to.equal( + seriousEats.url = constants.testUrl; + let actualRecipe = await seriousEats.fetchRecipe(); + expect(JSON.stringify(constants.expectedRecipe)).to.equal( JSON.stringify(actualRecipe) ); }); it("should throw an error if invalid url is used", async () => { try { - await seriousEats(Constants.invalidDomainUrl); + seriousEats.url = constants.invalidDomainUrl; + await seriousEats.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( @@ -26,7 +27,8 @@ describe("seriousEats", () => { it("should throw an error if a problem occurred during page retrieval", async () => { try { - await seriousEats(Constants.invalidUrl); + seriousEats.url = constants.invalidUrl; + await seriousEats.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); @@ -35,7 +37,8 @@ describe("seriousEats", () => { it("should throw an error if non-recipe page is used", async () => { try { - await seriousEats(Constants.nonRecipeUrl); + seriousEats.url = constants.nonRecipeUrl; + await seriousEats.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); @@ -44,7 +47,8 @@ describe("seriousEats", () => { it("should throw an error if sponsored recipe is used", async () => { try { - await seriousEats(Constants.sponsorUrl); + seriousEats.url = constants.sponsorUrl; + await seriousEats.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( From 966db0f27de96ade42c15078674340c56da42acf Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Mon, 25 Jan 2021 12:33:50 -0700 Subject: [PATCH 10/11] finishing up 2.0 refactor --- README.md | 8 +- helpers/BaseScraper.js | 31 +++-- helpers/PuppeteerScraper.js | 5 +- helpers/Recipe.js | 20 +++ helpers/RecipeSchema.js | 18 --- helpers/RecipeSchema.json | 43 ++++++ helpers/ScraperFactory.js | 18 +-- package.json | 6 +- scrapers/AmbitiousKitchenScraper.js | 6 +- scrapers/RecipeTinEatsScraper.js | 11 +- scrapers/SeriousEatsScraper.js | 2 +- scrapers/SimplyRecipesScraper.js | 53 ++++++++ scrapers/SmittenKitchenScraper.js | 118 ++++++++++++++++ scrapers/TasteOfHomeScraper.js | 42 ++++++ scrapers/TheBlackPeppercornScraper.js | 53 ++++++++ scrapers/ThePioneerWomanScraper.js | 51 +++++++ scrapers/TheRecipeCriticScraper.js | 39 ++++++ scrapers/TheSpruceEatsScraper.js | 53 ++++++++ scrapers/WhatsGabyCookingScraper.js | 52 +++++++ scrapers/WoolworthsScraper.js | 49 +++++++ scrapers/simplyrecipes.js | 67 --------- scrapers/smittenkitchen.js | 151 --------------------- scrapers/tasteofhome.js | 63 --------- scrapers/theblackpeppercorn.js | 72 ---------- scrapers/thepioneerwoman.js | 92 ------------- scrapers/therecipecritic.js | 57 -------- scrapers/thespruceeats.js | 76 ----------- scrapers/whatsgabycooking.js | 65 --------- scrapers/woolworths.js | 67 --------- test/constants/foodnetworkConstants.js | 2 +- test/constants/pinchofyumConstants.js | 1 + test/constants/recipetineatsConstants.js | 26 ++-- test/constants/simplyrecipesConstants.js | 5 - test/constants/tasteofhomeConstants.js | 2 +- test/constants/thepioneerwomanConstants.js | 40 +++--- test/constants/woolworthsConstants.js | 47 ++++--- test/recipetineats.test.js | 5 + test/seriouseats.test.js | 11 +- test/simplyrecipes.test.js | 10 +- test/smittenkitchen.test.js | 42 +++--- test/tasteofhome.test.js | 10 +- test/theblackpeppercorn.test.js | 10 +- test/thepioneerwoman.test.js | 6 +- test/therecipecritic.test.js | 10 +- test/thespruceeats.test.js | 10 +- test/whatsgabycooking.test.js | 10 +- test/woolworths.test.js | 6 +- 47 files changed, 731 insertions(+), 910 deletions(-) create mode 100644 helpers/Recipe.js delete mode 100644 helpers/RecipeSchema.js create mode 100644 helpers/RecipeSchema.json create mode 100644 scrapers/SimplyRecipesScraper.js create mode 100644 scrapers/SmittenKitchenScraper.js create mode 100644 scrapers/TasteOfHomeScraper.js create mode 100644 scrapers/TheBlackPeppercornScraper.js create mode 100644 scrapers/ThePioneerWomanScraper.js create mode 100644 scrapers/TheRecipeCriticScraper.js create mode 100644 scrapers/TheSpruceEatsScraper.js create mode 100644 scrapers/WhatsGabyCookingScraper.js create mode 100644 scrapers/WoolworthsScraper.js delete mode 100644 scrapers/simplyrecipes.js delete mode 100644 scrapers/smittenkitchen.js delete mode 100644 scrapers/tasteofhome.js delete mode 100644 scrapers/theblackpeppercorn.js delete mode 100644 scrapers/thepioneerwoman.js delete mode 100644 scrapers/therecipecritic.js delete mode 100644 scrapers/thespruceeats.js delete mode 100644 scrapers/whatsgabycooking.js delete mode 100644 scrapers/woolworths.js diff --git a/README.md b/README.md index ed70de1..ac4b6c7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # recipe-scraper -**A JS package for scraping recipes from the web.** +**A NodeJS package for scraping recipes from the web.** [![Build Status](https://travis-ci.org/jadkins89/Recipe-Scraper.svg?branch=master)](https://travis-ci.org/jadkins89/Recipe-Scraper) [![Coverage Status](https://coveralls.io/repos/github/jadkins89/Recipe-Scraper/badge.svg?branch=master)](https://coveralls.io/github/jadkins89/Recipe-Scraper?branch=master) @@ -47,13 +47,13 @@ recipeScraper("some.recipe.url").then(recipe => { - https://cookieandkate.com/ - https://copykat.com/ - https://damndelicious.net/ -- http://www.eatingwell.com/ +- https://www.eatingwell.com/ - https://www.epicurious.com/ - https://www.food.com/ - https://www.foodandwine.com/ - https://www.foodnetwork.com/ - https://gimmedelicious.com/ -- http://www.gimmesomeoven.com/ +- https://www.gimmesomeoven.com/ - https://julieblanner.com/ - https://www.kitchenstories.com/ - https://www.melskitchencafe.com/ @@ -80,7 +80,7 @@ Don't see a website you'd like to scrape? Open an [issue](https://github.com/jad ## Recipe Object -Depending on the recipe, certain fields may be left blank. All fields are represented as strings or arrays of strings. +Depending on the recipe, certain fields may be left blank. All fields are represented as strings or arrays of strings. The name, ingredients, and instructions property are required for schema validation. ```javascript { diff --git a/helpers/BaseScraper.js b/helpers/BaseScraper.js index 39d0e21..e26eb0e 100644 --- a/helpers/BaseScraper.js +++ b/helpers/BaseScraper.js @@ -2,8 +2,10 @@ const fetch = require("node-fetch"); const cheerio = require("cheerio"); +const { validate } = require("jsonschema"); -const RecipeSchema = require("./RecipeSchema"); +const Recipe = require("./Recipe"); +const recipeSchema = require("./RecipeSchema.json"); /** * Abstract Class which all scrapers inherit from @@ -15,7 +17,7 @@ class BaseScraper { } /** - * + * Checks if the url has the required sub url */ checkUrl() { if (!this.url.includes(this.subUrl)) { @@ -23,8 +25,11 @@ class BaseScraper { } } + /** + * Builds a new instance of Recipe + */ createRecipeObject() { - this.recipe = new RecipeSchema(); + this.recipe = new Recipe(); } defaultError() { @@ -32,12 +37,14 @@ class BaseScraper { } /** - * + * @param {object} $ - a cheerio object representing a DOM + * @returns {string|null} - if found, an image url */ defaultSetImage($) { this.recipe.image = $("meta[property='og:image']").attr("content") || - $("meta[name='og:image']").attr("content"); + $("meta[name='og:image']").attr("content") || + $("meta[itemprop='image']").attr("content"); } /** @@ -55,7 +62,8 @@ class BaseScraper { } /** - * + * Handles the workflow for fetching a recipe + * @returns {object} - an object representing the recipe */ async fetchRecipe() { this.checkUrl(); @@ -79,15 +87,12 @@ class BaseScraper { } /** - * + * Validates scraped recipes against defined recipe schema + * @returns {object} - an object representing the recipe */ - // TODO build recipe schema validateRecipe() { - if ( - !this.recipe.name || - !this.recipe.ingredients.length || - !this.recipe.instructions.length - ) { + let res = validate(this.recipe, recipeSchema); + if (!res.valid) { this.defaultError(); } return this.recipe; diff --git a/helpers/PuppeteerScraper.js b/helpers/PuppeteerScraper.js index 3c4cd30..93a9eef 100644 --- a/helpers/PuppeteerScraper.js +++ b/helpers/PuppeteerScraper.js @@ -80,10 +80,11 @@ class PuppeteerScraper extends BaseScraper { if (response._status < 400) { await this.customPoll(page); html = await page.content(); - } else { - this.defaultError(); } browser.close().catch(err => {}); + if (response._status >= 400) { + this.defaultError(); + } return cheerio.load(html); } } diff --git a/helpers/Recipe.js b/helpers/Recipe.js new file mode 100644 index 0000000..13c2d50 --- /dev/null +++ b/helpers/Recipe.js @@ -0,0 +1,20 @@ +class Recipe { + constructor() { + this.name = ""; + this.ingredients = []; + this.instructions = []; + this.tags = []; + this.time = { + prep: "", + cook: "", + active: "", + inactive: "", + ready: "", + total: "" + }; + this.servings = ""; + this.image = ""; + } +} + +module.exports = Recipe; diff --git a/helpers/RecipeSchema.js b/helpers/RecipeSchema.js deleted file mode 100644 index 0f0ac6b..0000000 --- a/helpers/RecipeSchema.js +++ /dev/null @@ -1,18 +0,0 @@ -function Recipe() { - this.name = ""; - this.ingredients = []; - this.instructions = []; - this.tags = []; - this.time = { - prep: "", - cook: "", - active: "", - inactive: "", - ready: "", - total: "" - }; - this.servings = ""; - this.image = ""; -} - -module.exports = Recipe; diff --git a/helpers/RecipeSchema.json b/helpers/RecipeSchema.json new file mode 100644 index 0000000..f21ba0f --- /dev/null +++ b/helpers/RecipeSchema.json @@ -0,0 +1,43 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "A recipe scraped from the web", + "type": "object", + "required": ["name", "ingredients", "instructions"], + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "ingredients": { + "type": "array", + "minItems": 1, + "items": { "type": "string" } + }, + "instructions": { + "type": "array", + "minItems": 1, + "uniqueItems": true, + "items": { "type": "string" } + }, + "tags": { + "type": "array", + "uniqueItems": true, + "items": { "type": "string" } + }, + "time": { + "type": "object", + "properties": { + "prep": { "type": "string" }, + "cook": { "type": "string" }, + "active": { "type": "string" }, + "inactive": { "type": "string" }, + "ready": { "type": "string" }, + "total": { "type": "string" } + } + }, + "servings": { + "type": { "type": "string" } + }, + "image": { "type": "string" } + } +} diff --git a/helpers/ScraperFactory.js b/helpers/ScraperFactory.js index 994ae5a..ebe8b5c 100644 --- a/helpers/ScraperFactory.js +++ b/helpers/ScraperFactory.js @@ -33,17 +33,17 @@ const domains = { pinchofyum: require("../scrapers/PinchOfYumScraper"), recipetineats: require("../scrapers/RecipeTinEatsScraper"), seriouseats: require("../scrapers/SeriousEatsScraper"), - simplyrecipes: require("../scrapers/simplyrecipes"), - smittenkitchen: require("../scrapers/smittenkitchen"), + simplyrecipes: require("../scrapers/SimplyRecipesScraper"), + smittenkitchen: require("../scrapers/SmittenKitchenScraper"), tastesbetterfromscratch: require("../scrapers/TastesBetterFromScratchScraper"), - tasteofhome: require("../scrapers/tasteofhome"), - theblackpeppercorn: require("../scrapers/theblackpeppercorn"), - therecipecritic: require("../scrapers/therecipecritic"), - thepioneerwoman: require("../scrapers/thepioneerwoman"), + tasteofhome: require("../scrapers/TasteOfHomeScraper"), + theblackpeppercorn: require("../scrapers/TheBlackPeppercornScraper"), + thepioneerwoman: require("../scrapers/ThePioneerWomanScraper"), + therecipecritic: require("../scrapers/TheRecipeCriticScraper"), therealfoodrds: require("../scrapers/TheRealFoodDrsScraper"), - thespruceeats: require("../scrapers/thespruceeats"), - whatsgabycooking: require("../scrapers/whatsgabycooking"), - woolworths: require("../scrapers/woolworths"), + thespruceeats: require("../scrapers/TheSpruceEatsScraper"), + whatsgabycooking: require("../scrapers/WhatsGabyCookingScraper"), + woolworths: require("../scrapers/WoolworthsScraper"), yummly: require("../scrapers/YummlyScraper") }; diff --git a/package.json b/package.json index 2617b8b..b59b28e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "recipe-scraper", - "version": "1.26.0", + "version": "2.0.0", "description": "A JS package for scraping recipes from the web.", "author": "Justin Adkins ", "license": "MIT", @@ -33,10 +33,10 @@ "homepage": "https://github.com/jadkins89/Recipe-Scraper#readme", "dependencies": { "cheerio": "^1.0.0-rc.3", + "jsonschema": "^1.4.0", "node-fetch": "^2.6.1", "parse-domain": "^2.3.2", - "puppeteer": "^1.20.0", - "request": "^2.88.0" + "puppeteer": "^1.20.0" }, "devDependencies": { "chai": "^4.2.0", diff --git a/scrapers/AmbitiousKitchenScraper.js b/scrapers/AmbitiousKitchenScraper.js index fb93e65..ff5bb14 100644 --- a/scrapers/AmbitiousKitchenScraper.js +++ b/scrapers/AmbitiousKitchenScraper.js @@ -29,11 +29,7 @@ class AmbitiousKitchenScraper extends BaseScraper { }); $(".wprm-recipe-instruction").each((i, el) => { - instructions.push( - $(el) - .text() - .trim() - ); + instructions.push(this.textTrim($(el))); }); time.prep = diff --git a/scrapers/RecipeTinEatsScraper.js b/scrapers/RecipeTinEatsScraper.js index 6704aa1..e1d1818 100644 --- a/scrapers/RecipeTinEatsScraper.js +++ b/scrapers/RecipeTinEatsScraper.js @@ -18,11 +18,12 @@ class RecipeTinEatsScraper extends BaseScraper { $(".wprm-recipe-ingredient").each((i, el) => { ingredients.push( - this.textTrim( - $(el) - .replace(/\s\s+/g, " ") - .replace("▢", "") - ) + $(el) + .text() + .replace(/\s\s+/g, " ") + .replace(" ,", ",") + .replace("▢", "") + .trim() ); }); diff --git a/scrapers/SeriousEatsScraper.js b/scrapers/SeriousEatsScraper.js index 2437d3e..d3e1ac4 100644 --- a/scrapers/SeriousEatsScraper.js +++ b/scrapers/SeriousEatsScraper.js @@ -9,7 +9,7 @@ const BaseScraper = require("../helpers/BaseScraper"); class SeriousEatsScraper extends BaseScraper { constructor(url) { super(url, "seriouseats.com/"); - if (this.url.includes("seriouseats.com/sponsored/")) { + if (this.url && this.url.includes("seriouseats.com/sponsored/")) { throw new Error("seriouseats.com sponsored recipes not supported"); } } diff --git a/scrapers/SimplyRecipesScraper.js b/scrapers/SimplyRecipesScraper.js new file mode 100644 index 0000000..10dc81d --- /dev/null +++ b/scrapers/SimplyRecipesScraper.js @@ -0,0 +1,53 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping simplyrecipes.com + * @extends BaseScraper + */ +class SimplyRecipesScraper extends BaseScraper { + constructor(url) { + super(url, "simplyrecipes.com/recipes/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".recipe-callout") + .children("h2") + .text(); + + $(".recipe-ingredients") + .find("li.ingredient, p") + .each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".instructions") + .find("p") + .each((i, el) => { + let curEl = $(el).text(); + if (curEl) { + instructions.push(curEl.replace(/^\d+\s/, "")); + } + }); + + let tagsSet = new Set(); + $(".taxonomy-term").each((i, el) => { + tagsSet.add( + $(el) + .find("span") + .text() + ); + }); + this.recipe.tags = Array.from(tagsSet); + + time.prep = $(".preptime").text(); + time.cook = $(".cooktime").text(); + + this.recipe.servings = $(".yield").text(); + } +} + +module.exports = SimplyRecipesScraper; diff --git a/scrapers/SmittenKitchenScraper.js b/scrapers/SmittenKitchenScraper.js new file mode 100644 index 0000000..613cd4f --- /dev/null +++ b/scrapers/SmittenKitchenScraper.js @@ -0,0 +1,118 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping smittenkitchen.com + * @extends BaseScraper + */ +class SmittenKitchenScraper extends BaseScraper { + constructor(url) { + super(url, "smittenkitchen.com/"); + } + + newScrape($) { + const { ingredients, time } = this.recipe; + this.recipe.name = $(".jetpack-recipe-title").text(); + + $(".jetpack-recipe-ingredients") + .children("ul") + .first() + .children() + .each((i, el) => { + ingredients.push($(el).text()); + }); + + this.recipe.instructions = $(".jetpack-recipe-directions") + .text() + .split("\n") + .filter( + instruction => + instruction && + !instruction.includes("Do More:TwitterFacebookPinterestPrintEmail") && + !instruction.includes("\t") + ); + + if (!this.recipe.instructions.length) { + let lastIngredient = ingredients[ingredients.length - 1]; + let recipeContents = $(".entry-content").text(); + this.recipe.instructions = recipeContents + .slice( + recipeContents.indexOf(lastIngredient) + lastIngredient.length, + recipeContents.indexOf("Rate this:") + ) + .split("\n") + .filter( + instruction => + instruction && + !instruction.includes( + "Do More:TwitterFacebookPinterestPrintEmail" + ) && + !instruction.includes("\t") + ); + } + + time.total = $("time[itemprop=totalTime]") + .text() + .replace("Time: ", ""); + + this.recipe.servings = $(".jetpack-recipe-servings") + .text() + .replace("Servings: ", ""); + } + + oldScrape($) { + const body = $(".entry-content").children("p"); + let ingredientSwitch = false; + let orderedListRegex = new RegExp(/\d\.\s/); + let servingWords = ["Yield", "Serve", "Servings"]; + let servingsRegex = new RegExp(servingWords.join("|"), "i"); + + body.each((i, el) => { + if (i === 0) { + this.recipe.name = this.textTrim($(el).children("b")); + } else if ( + $(el).children("br").length && + !$(el).children("b").length && + !orderedListRegex.test($(el).text()) && + !servingsRegex.test($(el).text()) + ) { + ingredientSwitch = true; + let updatedIngredients = this.recipe.ingredients.concat( + this.textTrim($(el)).split("\n") + ); + this.recipe.ingredients = updatedIngredients; + } else if (ingredientSwitch) { + let updatedInstructions = this.recipe.instructions.concat( + this.textTrim($(el)).split("\n") + ); + this.recipe.instructions = updatedInstructions; + } else { + let possibleServing = $(el).text(); + if (servingsRegex.test(possibleServing)) { + possibleServing.split("\n").forEach(line => { + if (servingsRegex.test(line)) { + this.recipe.servings = line.substring(line.indexOf(":") + 2); + } + }); + } + } + }); + } + + scrape($) { + this.defaultSetImage($); + const { tags } = this.recipe; + if ($(".jetpack-recipe").length) { + this.newScrape($); + } else { + this.oldScrape($); + } + + $("a[rel='category tag']").each((i, el) => { + tags.push($(el).text()); + }); + } +} + +module.exports = SmittenKitchenScraper; diff --git a/scrapers/TasteOfHomeScraper.js b/scrapers/TasteOfHomeScraper.js new file mode 100644 index 0000000..1fc49ac --- /dev/null +++ b/scrapers/TasteOfHomeScraper.js @@ -0,0 +1,42 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping tasteofhome.com + * @extends BaseScraper + */ +class TasteOfHomeScraper extends BaseScraper { + constructor(url) { + super(url, "tasteofhome.com/recipes/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $("h1.recipe-title") + .text() + .trim(); + + $("meta[property='article:tag']").each((i, el) => { + tags.push($(el).attr("content")); + }); + + $(".recipe-ingredients__list li").each((i, el) => { + ingredients.push($(el).text()); + }); + + $(".recipe-directions__item").each((i, el) => { + instructions.push(this.textTrim($(el))); + }); + + let timeStr = $(".total-time > p") + .text() + .split(/Bake:/g); + time.prep = timeStr[0].replace("Prep:", "").trim(); + time.cook = (timeStr[1] || "").trim(); + this.recipe.servings = $(".makes > p").text(); + } +} + +module.exports = TasteOfHomeScraper; diff --git a/scrapers/TheBlackPeppercornScraper.js b/scrapers/TheBlackPeppercornScraper.js new file mode 100644 index 0000000..9df4784 --- /dev/null +++ b/scrapers/TheBlackPeppercornScraper.js @@ -0,0 +1,53 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping theblackpeppercorn.com + * @extends BaseScraper + */ +class TheBlackPeppercornScraper extends BaseScraper { + constructor(url) { + super(url, "theblackpeppercorn.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = this.textTrim($(".wprm-recipe-name")); + + $(".wprm-recipe-ingredients > .wprm-recipe-ingredient").each((i, el) => { + ingredients.push( + $(el) + .text() + .replace(/(▢)/g, "") + .replace(" ,", ",") + ); + }); + + $(".wprm-recipe-instruction-text").each((i, el) => { + instructions.push( + $(el) + .remove("img") + .text() + .replace(/\s\s+/g, "") + ); + }); + + time.prep = $(".wprm-recipe-prep-time-label") + .next() + .text(); + time.cook = $(".wprm-recipe-cook-time-label") + .next() + .text(); + time.inactive = $(".wprm-recipe-custom-time-label") + .next() + .text(); + time.total = $(".wprm-recipe-total-time-label") + .next() + .text(); + this.recipe.servings = $(".wprm-recipe-servings").text(); + } +} + +module.exports = TheBlackPeppercornScraper; diff --git a/scrapers/ThePioneerWomanScraper.js b/scrapers/ThePioneerWomanScraper.js new file mode 100644 index 0000000..36765dc --- /dev/null +++ b/scrapers/ThePioneerWomanScraper.js @@ -0,0 +1,51 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping thepioneerwoman.com + * @extends BaseScraper + */ +class ThePioneerWomanScraper extends BaseScraper { + constructor(url) { + super(url, "thepioneerwoman.com/food-cooking/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".recipe-hed") + .first() + .text(); + + $(".ingredient-item").each((i, el) => { + ingredients.push(this.textTrim($(el)).replace(/\s\s+/g, " ")); + }); + + $(".direction-lists") + .find("li") + .each((i, el) => { + instructions.push(this.textTrim($(el))); + }); + + if (!instructions.length) { + let directions = $(".direction-lists") + .contents() + .each((i, el) => { + if (el.type === "text") { + instructions.push(this.textTrim($(el))); + } + }); + } + + time.prep = this.textTrim($(".prep-time-amount")).replace(/\s\s+/g, " "); + time.cook = this.textTrim($(".cook-time-amount")).replace(/\s\s+/g, " "); + time.total = this.textTrim($(".total-time-amount")).replace(/\s\s+/g, " "); + this.recipe.servings = this.textTrim($(".yields-amount")).replace( + /\s\s+/g, + " " + ); + } +} + +module.exports = ThePioneerWomanScraper; diff --git a/scrapers/TheRecipeCriticScraper.js b/scrapers/TheRecipeCriticScraper.js new file mode 100644 index 0000000..5e084c7 --- /dev/null +++ b/scrapers/TheRecipeCriticScraper.js @@ -0,0 +1,39 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping therecipecritic.com + * @extends BaseScraper + */ +class TheRecipeCriticScraper extends BaseScraper { + constructor(url) { + super(url, "therecipecritic.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = this.textTrim($(".wprm-recipe-name")); + + $(".wprm-recipe-ingredient").each((i, el) => { + ingredients.push(this.textTrim($(el)).replace(/\s\s+/g, " ")); + }); + + $(".wprm-recipe-instruction-text").each((i, el) => { + instructions.push(this.textTrim($(el)).replace(/\s\s+/g, " ")); + }); + + $(".wprm-recipe-details-name").remove(); + + time.prep = this.textTrim($(".wprm-recipe-prep-time-container")); + time.cook = this.textTrim($(".wprm-recipe-cook-time-container")); + time.inactive = this.textTrim($(".wprm-recipe-custom-time-container")); + time.total = this.textTrim($(".wprm-recipe-total-time-container")); + this.recipe.servings = $( + ".wprm-recipe-servings-container .wprm-recipe-servings" + ).text(); + } +} + +module.exports = TheRecipeCriticScraper; diff --git a/scrapers/TheSpruceEatsScraper.js b/scrapers/TheSpruceEatsScraper.js new file mode 100644 index 0000000..dd1532d --- /dev/null +++ b/scrapers/TheSpruceEatsScraper.js @@ -0,0 +1,53 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping thespruceeats.com + * @extends BaseScraper + */ +class TheSpruceEatsScraper extends BaseScraper { + constructor(url) { + super(url, "thespruceeats.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, tags, time } = this.recipe; + this.recipe.name = $(".heading__title").text(); + + $(".simple-list__item").each((i, el) => { + ingredients.push(this.textTrim($(el))); + }); + + $(".section--instructions") + .find("li") + .find("p.comp") + .each((i, el) => { + instructions.push(this.textTrim($(el))); + }); + + $(".recipe-search-suggestions__chip").each((i, el) => { + tags.push( + $(el) + .find("a") + .text() + ); + }); + + let metaText = $(".meta-text").each((i, el) => { + let text = $(el).text(); + if (text.includes("Prep:")) { + time.prep = text.replace("Prep: ", "").trim(); + } else if (text.includes("Cook: ")) { + time.cook = text.replace("Cook:", "").trim(); + } else if (text.includes("Total: ")) { + time.total = text.replace("Total:", "").trim(); + } else if (text.includes("Servings: ")) { + this.recipe.servings = text.replace("Servings: ", "").trim(); + } + }); + } +} + +module.exports = TheSpruceEatsScraper; diff --git a/scrapers/WhatsGabyCookingScraper.js b/scrapers/WhatsGabyCookingScraper.js new file mode 100644 index 0000000..f024345 --- /dev/null +++ b/scrapers/WhatsGabyCookingScraper.js @@ -0,0 +1,52 @@ +"use strict"; + +const BaseScraper = require("../helpers/BaseScraper"); + +/** + * Class for scraping whatsgabycooking.com + * @extends BaseScraper + */ +class WhatsGabyCookingScraper extends BaseScraper { + constructor(url) { + super(url, "whatsgabycooking.com/"); + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = $(".wprm-recipe-name").text(); + + $(".wprm-recipe-ingredient").each((i, el) => { + let elText = this.textTrim($(el)); + if (elText.length) { + ingredients.push(elText); + } + }); + + $(".wprm-recipe-instruction-group").each((i, el) => { + let groupName = $(el) + .find(".wprm-recipe-group-name") + .text(); + let instruction = $(el) + .find(".wprm-recipe-instruction-text") + .text(); + if (groupName) { + instructions.push(groupName); + } + instructions.push(instruction); + }); + + this.recipe.tags = $(".wprm-recipe-cuisine") + .text() + .split(",") + .map(x => x.trim()); + + const times = $(".wprm-recipe-time"); + time.prep = $(times.first()).text(); + time.cook = $(times.get(1)).text(); + time.total = $(times.last()).text(); + this.recipe.servings = $(".wprm-recipe-servings-with-unit").text(); + } +} + +module.exports = WhatsGabyCookingScraper; diff --git a/scrapers/WoolworthsScraper.js b/scrapers/WoolworthsScraper.js new file mode 100644 index 0000000..f65e40e --- /dev/null +++ b/scrapers/WoolworthsScraper.js @@ -0,0 +1,49 @@ +"use strict"; + +const PuppeteerScraper = require("../helpers/PuppeteerScraper"); + +/** + * Class for scraping woolworths.com.au + * @extends PuppeteerScraper + */ +class WoolworthsScraper extends PuppeteerScraper { + constructor(url) { + super(url, "woolworths.com.au/shop/recipedetail/"); + } + + async customPoll(page) { + let container, + count = 0; + do { + container = await page.$(".recipeDetailContainer"); + if (!container) { + await page.waitFor(100); + count++; + } + } while (!container && count < 60); + return true; + } + + scrape($) { + this.defaultSetImage($); + const { ingredients, instructions, time } = this.recipe; + this.recipe.name = this.textTrim($(".recipeDetailContainer-title")); + $(".recipeDetailContainer-ingredient").each((i, el) => { + ingredients.push(this.textTrim($(el))); + }); + + $(".recipeDetailContainer-instructions").each((i, el) => { + let text = this.textTrim($(el)); + if (text.length) { + instructions.push(text.replace(/^\d+\.\s/g, "")); + } + }); + + time.prep = this.textTrim($("span[itemprop='prepTime']")) + " Mins"; + time.cook = this.textTrim($("span[itemprop='cookTime']")) + " Mins"; + + this.recipe.servings = $("span[itemprop='recipeYield']").text(); + } +} + +module.exports = WoolworthsScraper; diff --git a/scrapers/simplyrecipes.js b/scrapers/simplyrecipes.js deleted file mode 100644 index 9fd2a22..0000000 --- a/scrapers/simplyrecipes.js +++ /dev/null @@ -1,67 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const simplyRecipes = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("simplyrecipes.com/recipes/")) { - reject( - new Error("url provided must include 'simplyrecipes.com/recipes/'") - ); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".recipe-callout") - .children("h2") - .text(); - - $(".recipe-ingredients") - .find("li.ingredient, p") - .each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".instructions") - .find("p") - .each((i, el) => { - let curEl = $(el).text(); - if (curEl) { - Recipe.instructions.push(curEl.replace(/^\d+\s/, "")); - } - }); - - $(".taxonomy-term").each((i, el) => { - Recipe.tags.push( - $(el) - .find("span") - .text() - ); - }); - - Recipe.time.prep = $(".preptime").text(); - Recipe.time.cook = $(".cooktime").text(); - - Recipe.servings = $(".yield").text(); - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = simplyRecipes; diff --git a/scrapers/smittenkitchen.js b/scrapers/smittenkitchen.js deleted file mode 100644 index 60b3a39..0000000 --- a/scrapers/smittenkitchen.js +++ /dev/null @@ -1,151 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const smittenKitchen = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("smittenkitchen.com/")) { - reject(new Error("url provided must include 'smittenkitchen.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - if ($(".jetpack-recipe").length) { - newSmitten($, Recipe); - } else { - oldSmitten($, Recipe); - } - - $("a[rel='category tag']").each((i, el) => { - Recipe.tags.push( - $(el) - .text() - ); - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -const oldSmitten = ($, Recipe) => { - const body = $(".entry-content").children("p"); - let ingredientSwitch = false; - let orderedListRegex = new RegExp(/\d\.\s/); - let servingWords = ["Yield", "Serve", "Servings"]; - let servingsRegex = new RegExp(servingWords.join("|"), "i"); - - Recipe.image = $("meta[property='og:image']").attr("content"); - body.each((i, el) => { - if (i === 0) { - Recipe.name = $(el) - .children("b") - .text() - .trim(); - } else if ( - $(el).children("br").length && - !$(el).children("b").length && - !orderedListRegex.test($(el).text()) && - !servingsRegex.test($(el).text()) - ) { - ingredientSwitch = true; - let updatedIngredients = Recipe.ingredients.concat( - $(el) - .text() - .trim() - .split("\n") - ); - Recipe.ingredients = updatedIngredients; - } else if (ingredientSwitch) { - let updatedInstructions = Recipe.instructions.concat( - $(el) - .text() - .trim() - .split("\n") - ); - Recipe.instructions = updatedInstructions; - } else { - let possibleServing = $(el).text(); - if (servingsRegex.test(possibleServing)) { - possibleServing.split("\n").forEach(line => { - if (servingsRegex.test(line)) { - Recipe.servings = line.substring(line.indexOf(":") + 2); - } - }); - } - } - }); -}; - -const newSmitten = ($, Recipe) => { - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".jetpack-recipe-title").text(); - - $(".jetpack-recipe-ingredients") - .children("ul") - .first() - .children() - .each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - Recipe.instructions = $(".jetpack-recipe-directions") - .text() - .split("\n") - .filter(instruction => { - if ( - !instruction || - instruction.includes("Do More:TwitterFacebookPinterestPrintEmail") || - instruction.includes("\t") - ) { - return false; - } - return true; - }); - - if (!Recipe.instructions.length) { - let lastIngredient = Recipe.ingredients[Recipe.ingredients.length - 1]; - let recipeContents = $(".entry-content").text(); - Recipe.instructions = recipeContents - .slice( - recipeContents.indexOf(lastIngredient) + lastIngredient.length, - recipeContents.indexOf("Rate this:") - ) - .split("\n") - .filter(instruction => { - if ( - !instruction || - instruction.includes("Do More:TwitterFacebookPinterestPrintEmail") || - instruction.includes("\t") - ) { - return false; - } - return true; - }); - } - - Recipe.time.total = $("time[itemprop=totalTime]") - .text() - .replace("Time: ", ""); - - Recipe.servings = $(".jetpack-recipe-servings") - .text() - .replace("Servings: ", ""); -}; - -module.exports = smittenKitchen; diff --git a/scrapers/tasteofhome.js b/scrapers/tasteofhome.js deleted file mode 100644 index 378d589..0000000 --- a/scrapers/tasteofhome.js +++ /dev/null @@ -1,63 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const tasteofhome = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("tasteofhome.com/recipes/")) { - reject(new Error("url provided must include 'tasteofhome.com/recipes/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $("h1.recipe-title") - .text() - .trim(); - - $("meta[property='article:tag']").each((i, el) => { - Recipe.tags.push($(el).attr("content")); - }); - - $(".recipe-ingredients__list li").each((i, el) => { - Recipe.ingredients.push($(el).text()); - }); - - $(".recipe-directions__item").each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .trim() - ); - }); - - let timeStr = $(".recipe-time-yield__label-prep") - .text() - .split(/Bake:/g); - Recipe.time.prep = timeStr[0].replace("Prep:", "").trim(); - Recipe.time.cook = (timeStr[1] || "").trim(); - Recipe.servings = $(".recipe-time-yield__label-servings") - .text() - .trim(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = tasteofhome; diff --git a/scrapers/theblackpeppercorn.js b/scrapers/theblackpeppercorn.js deleted file mode 100644 index 2bd3ede..0000000 --- a/scrapers/theblackpeppercorn.js +++ /dev/null @@ -1,72 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const theblackpeppercorn = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("theblackpeppercorn.com/")) { - reject(new Error("url provided must include 'theblackpeppercorn.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name") - .text() - .trim(); - - $(".wprm-recipe-ingredients > .wprm-recipe-ingredient").each( - (i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/(▢)/g, "") - .replace(" ,", ",") - ); - } - ); - - $(".wprm-recipe-instruction-text").each((i, el) => { - Recipe.instructions.push( - $(el) - .remove("img") - .text() - .replace(/\s\s+/g, "") - ); - }); - - Recipe.time.prep = $(".wprm-recipe-prep-time-label") - .next() - .text(); - Recipe.time.cook = $(".wprm-recipe-cook-time-label") - .next() - .text(); - Recipe.time.inactive = $(".wprm-recipe-custom-time-label") - .next() - .text(); - Recipe.time.total = $(".wprm-recipe-total-time-label") - .next() - .text(); - Recipe.servings = $(".wprm-recipe-servings").text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = theblackpeppercorn; diff --git a/scrapers/thepioneerwoman.js b/scrapers/thepioneerwoman.js deleted file mode 100644 index 7d77a54..0000000 --- a/scrapers/thepioneerwoman.js +++ /dev/null @@ -1,92 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const thePioneerWoman = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("thepioneerwoman.com/food-cooking/")) { - reject( - new Error( - "url provided must include 'thepioneerwoman.com/food-cooking/'" - ) - ); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".recipe-hed") - .first() - .text(); - - $(".ingredient-item").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .replace(/\s\s+/g, " ") - .trim() - ); - }); - - $(".direction-lists") - .find("li") - .each((i, el) => { - if (el.type === "text") { - Recipe.instructions.push( - $(el) - .text() - .trim() - ); - } - }); - if (!Recipe.instructions.length) { - let directions = $(".direction-lists") - .contents() - .each((i, el) => { - if (el.type === "text") { - Recipe.instructions.push( - $(el) - .text() - .trim() - ); - } - }); - } - - Recipe.time.prep = $(".prep-time-amount") - .text() - .replace(/\s\s+/g, " ") - .trim(); - Recipe.time.cook = $(".cook-time-amount") - .text() - .replace(/\s\s+/g, " ") - .trim(); - Recipe.time.total = $(".total-time-amount") - .text() - .replace(/\s\s+/g, " ") - .trim(); - Recipe.servings = $(".yields-amount") - .text() - .replace(/\s\s+/g, " ") - .trim(); - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = thePioneerWoman; diff --git a/scrapers/therecipecritic.js b/scrapers/therecipecritic.js deleted file mode 100644 index dc81a92..0000000 --- a/scrapers/therecipecritic.js +++ /dev/null @@ -1,57 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const therecipecritic = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("therecipecritic.com/")) { - reject(new Error("url provided must include 'therecipecritic.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - const textTrim = el => el.text().trim(); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = textTrim($(".wprm-recipe-name")); - - $(".wprm-recipe-ingredient").each((i, el) => { - Recipe.ingredients.push(textTrim($(el)).replace(/\s\s+/g, " ")); - }); - - $(".wprm-recipe-instruction-text").each((i, el) => { - Recipe.instructions.push(textTrim($(el)).replace(/\s\s+/g, " ")); - }); - - $(".wprm-recipe-details-name").remove(); - - Recipe.time.prep = textTrim($(".wprm-recipe-prep-time-container")); - Recipe.time.cook = textTrim($(".wprm-recipe-cook-time-container")); - Recipe.time.inactive = textTrim( - $(".wprm-recipe-custom-time-container") - ); - Recipe.time.total = textTrim($(".wprm-recipe-total-time-container")); - Recipe.servings = $( - ".wprm-recipe-servings-container .wprm-recipe-servings" - ).text(); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = therecipecritic; diff --git a/scrapers/thespruceeats.js b/scrapers/thespruceeats.js deleted file mode 100644 index 710357b..0000000 --- a/scrapers/thespruceeats.js +++ /dev/null @@ -1,76 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const theSpruceEats = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("thespruceeats.com/")) { - reject(new Error("url provided must include 'thespruceeats.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".heading__title").text(); - - $(".simple-list__item").each((i, el) => { - Recipe.ingredients.push( - $(el) - .text() - .trim() - ); - }); - - $(".section--instructions") - .find("li") - .find("p.comp") - .each((i, el) => { - Recipe.instructions.push( - $(el) - .text() - .trim() - ); - }); - - $(".recipe-search-suggestions__chip").each((i, el) => { - Recipe.tags.push( - $(el) - .find("a") - .text() - ); - }); - - let metaText = $(".meta-text").each((i, el) => { - let text = $(el).text(); - if (text.includes("Prep:")) { - Recipe.time.prep = text.replace("Prep: ", "").trim(); - } else if (text.includes("Cook: ")) { - Recipe.time.cook = text.replace("Cook:", "").trim(); - } else if (text.includes("Total: ")) { - Recipe.time.total = text.replace("Total:", "").trim(); - } else if (text.includes("Servings: ")) { - Recipe.servings = text.replace("Servings: ", "").trim(); - } - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = theSpruceEats; diff --git a/scrapers/whatsgabycooking.js b/scrapers/whatsgabycooking.js deleted file mode 100644 index 2a76d24..0000000 --- a/scrapers/whatsgabycooking.js +++ /dev/null @@ -1,65 +0,0 @@ -const request = require("request"); -const cheerio = require("cheerio"); - -const RecipeSchema = require("../helpers/RecipeSchema"); - -const whatsGabyCooking = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("whatsgabycooking.com/")) { - reject(new Error("url provided must include 'whatsgabycooking.com/'")); - } else { - request(url, (error, response, html) => { - if (!error && response.statusCode === 200) { - const $ = cheerio.load(html); - - Recipe.image = $("meta[property='og:image']").attr("content"); - Recipe.name = $(".wprm-recipe-name").text(); - - $(".wprm-recipe-ingredient").each((i, el) => { - let elText = $(el) - .text() - .trim(); - if (elText.length) { - Recipe.ingredients.push(elText); - } - }); - - $(".wprm-recipe-instruction-group").each((i, el) => { - let groupName = $(el) - .find(".wprm-recipe-group-name") - .text(); - let instruction = $(el) - .find(".wprm-recipe-instruction-text") - .text(); - if (groupName) { - Recipe.instructions.push(groupName); - } - Recipe.instructions.push(instruction); - }); - - Recipe.tags = $(".wprm-recipe-cuisine").text().split(',').map(x => x.trim()); - - const times = $(".wprm-recipe-time"); - Recipe.time.prep = $(times.first()).text(); - Recipe.time.cook = $(times.get(1)).text(); - Recipe.time.total = $(times.last()).text(); - Recipe.servings = $(".wprm-recipe-servings-with-unit").text(); - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - }); - } - }); -}; - -module.exports = whatsGabyCooking; diff --git a/scrapers/woolworths.js b/scrapers/woolworths.js deleted file mode 100644 index 7d730f8..0000000 --- a/scrapers/woolworths.js +++ /dev/null @@ -1,67 +0,0 @@ -const request = require("request"); -const RecipeSchema = require("../helpers/RecipeSchema"); - -const urlRe = /\/(\d\d\d\d)\//; -const instructionsIndexRe = /(?:\d.)(.*)/; -const instructionsTipRe = /(Tip:)(.*)/i; - -const woolworths = url => { - const Recipe = new RecipeSchema(); - return new Promise((resolve, reject) => { - if (!url.includes("woolworths.com.au/shop/recipedetail/")) { - reject( - new Error( - "url provided must include 'woolworths.com.au/shop/recipedetail/'" - ) - ); - } else if (!urlRe.test(url)) { - reject(new Error("No recipe found on page")); - } else { - const recipeId = urlRe.exec(url)[1]; - recipeJsonUrl = `https://www.woolworths.com.au/apis/ui/recipes/${recipeId}`; - request( - { - url: recipeJsonUrl, - json: true - }, - (error, response, html) => { - if (!error && response.statusCode === 200 && html) { - Recipe.image = html.ImageFilename; - Recipe.name = html.Title.trim(); - Recipe.ingredients = html.Ingredients.map(i => - i.Description.trim() - ); - Recipe.time.prep = html.PreparationDuration.toString(); - Recipe.time.cook = html.CookingDuration.toString(); - Recipe.servings = html.Servings.toString(); - html.Instructions.split("\r\n").map(step => { - let newIngredient = ""; - if (instructionsIndexRe.test(step)) { - newIngredient = instructionsIndexRe.exec(step)[1].trim(); - } else if (instructionsTipRe.test(step)) { - newIngredient = step.trim(); - } - if (newIngredient.length) { - Recipe.instructions.push(newIngredient); - } - }); - - if ( - !Recipe.name || - !Recipe.ingredients.length || - !Recipe.instructions.length - ) { - reject(new Error("No recipe found on page")); - } else { - resolve(Recipe); - } - } else { - reject(new Error("No recipe found on page")); - } - } - ); - } - }); -}; - -module.exports = woolworths; diff --git a/test/constants/foodnetworkConstants.js b/test/constants/foodnetworkConstants.js index c49eefb..d599c15 100644 --- a/test/constants/foodnetworkConstants.js +++ b/test/constants/foodnetworkConstants.js @@ -28,7 +28,7 @@ module.exports = { "Increase the heat to medium, add the tomatoes and wine and cook until reduced by half, 2 to 3 minutes, then stir in the chicken broth. Put the pork chops in the sauce and carefully nestle the potatoes around them. Cook 3 to 5 minutes more until the pork chops register 145 degrees F in the center on an instant-read thermometer. Remove the pork chops from the sauce and transfer to shallow bowls or a serving platter. Taste the sauce and season with additional salt and pepper if needed. If most of the liquid in the pan evaporates while you are cooking the pork, stir in tablespoons of water at a time to get it back to a saucy consistency. If the sauce is a little thin and weak, after you take the chops out, turn the heat up and cook 1 to 2 minutes more to thicken and concentrate the flavors. Stir the parsley into the sauce, remove the thyme sprigs, spoon the sauce over the chops and serve." ], tags: [ - "Comfort Food", + "Comfort Food Restaurants", "Cast Iron Skillet", "Skillet Recipes", "French Recipes", diff --git a/test/constants/pinchofyumConstants.js b/test/constants/pinchofyumConstants.js index dc08428..ba2dc0c 100644 --- a/test/constants/pinchofyumConstants.js +++ b/test/constants/pinchofyumConstants.js @@ -36,6 +36,7 @@ module.exports = { "Lunch", "Quick and Easy", "Salads", + "Sugar Free", "Sugar-Free", "Superfoods", "Vegan", diff --git a/test/constants/recipetineatsConstants.js b/test/constants/recipetineatsConstants.js index 7393b61..fdf6e37 100644 --- a/test/constants/recipetineatsConstants.js +++ b/test/constants/recipetineatsConstants.js @@ -7,15 +7,15 @@ module.exports = { expectedRecipe: { name: "Dan Dan Noodles (Spicy Sichuan noodles)", ingredients: [ - "2 tbsp Chinese sesame sauce (sub tahini, Note 1)", - "1.5 tbsp Chinese chilli paste in oil , adjust spiciness (Note 2)", + "2 tbsp Chinese sesame paste (sub tahini, Note 1)", + "1.5 tbsp Chinese chilli paste in oil, adjust spiciness (Note 2)", "4 tbsp light soy sauce (Note 3)", - "2 garlic cloves , minced", + "2 garlic cloves, minced", "3 tsp white sugar", "1/2 tsp Chinese five spice powder (Note 4)", - "1 tsp Sichuan pepper powder , preferably freshly ground (Note 5)", - "3 tbsp (or more!) chilli oil , preferably Chinese (Note 6)", - "3/4 cup (185ml) chicken broth/stock , hot, low sodium", + "1 tsp Sichuan pepper powder, preferably freshly ground (Note 5)", + "3 tbsp (or more!) chilli oil, preferably Chinese (Note 6)", + "3/4 cup (185ml) chicken broth/stock, hot, low sodium", "2 tsp Hoisin sauce", "1 tsp dark soy sauce (Note 7)", "1 tbsp Chinese cooking wine (sub 2 tbsp extra chicken stock)", @@ -23,17 +23,17 @@ module.exports = { "1 tbsp vegetable oil", "250g/ 8oz pork mince (ground pork)", "1 tsp vegetable oil", - "30g (1/4 cup) Sui Mi Ya Cai (preserved mustard greens) , finely chopped (Note 8)", - "500g/1lb white fresh noodles , medium thickness (Note 9)", - "16 choy sum stems , cut into 15cm pieces", - "2 green onions , finely sliced", - "1 tbsp peanuts , finely chopped (optional)" + "30g (1/4 cup) Sui Mi Ya Cai (preserved mustard greens), finely chopped (Note 8)", + "500g/1lb white fresh noodles, medium thickness (Note 9)", + "16 choy sum stems, cut into 15cm pieces", + "2 green onions, finely sliced", + "1 tbsp peanuts, finely chopped (optional)" ], instructions: [ "Dan Dan Sauce:", - "Mix all ingredients except oil and chicken stock. Then gently stir in oil and stock - oil should be sitting on surface. Set aside.", + "Mix all ingredients except oil and chicken stock. Then gently stir in oil and stock – oil should be sitting on surface. Set aside.", "Pork:", - 'Mix together hoisin, soy, Chinese wine and five spice ("Sauce").', + "Mix together hoisin, soy, Chinese wine and five spice (“Sauce”).", "Heat oil in a skillet or wok over high heat. Add pork and cook, breaking it up as you, until it changes from pink to white. Add Sauce and cook for 1 minute, then transfer into a bowl.", "Sui mi ya cai (preserved mustard greens):", "Return skillet to stove, reduce to medium heat. Add oil into middle of skillet.", diff --git a/test/constants/simplyrecipesConstants.js b/test/constants/simplyrecipesConstants.js index 059aa8c..2b975b1 100644 --- a/test/constants/simplyrecipesConstants.js +++ b/test/constants/simplyrecipesConstants.js @@ -25,14 +25,9 @@ module.exports = { "Vegan", "Bread", "Lunch", - "Salad", "Side Dish", - "Favorite Summer", "Make-ahead", - "Italian", - "Vegan", "Vegetarian", - "Bread", "Cucumber", "Tomato" ], diff --git a/test/constants/tasteofhomeConstants.js b/test/constants/tasteofhomeConstants.js index 7a134ae..ddd08ab 100644 --- a/test/constants/tasteofhomeConstants.js +++ b/test/constants/tasteofhomeConstants.js @@ -59,6 +59,6 @@ module.exports = { }, servings: "8 servings", image: - "https://www.tasteofhome.com/wp-content/uploads/2018/01/Artichoke-Chicken_EXPS_13X9BZ19_24_B10_04_5b-6.jpg" + "https://www.tasteofhome.com/wp-content/uploads/2018/01/Artichoke-Chicken_EXPS_13X9BZ19_24_B10_04_5b-9.jpg" } }; diff --git a/test/constants/thepioneerwomanConstants.js b/test/constants/thepioneerwomanConstants.js index b0e3e89..acb8c5d 100644 --- a/test/constants/thepioneerwomanConstants.js +++ b/test/constants/thepioneerwomanConstants.js @@ -7,27 +7,27 @@ module.exports = { expectedRecipe: { name: "French Dip Sandwiches", ingredients: [ - "1 tbsp. Kosher Salt", - "2 tbsp. Black Pepper", - "1/2 tsp. Ground Oregano", - "1/2 tsp. Ground Thyme", - "1 whole Boneless Ribeye Loin, About 4 To 5 Pounds (can Also Use Sirloin)", - "2 whole Large Onions, Sliced Thin", - "5 cloves Garlic, Minced", - "1 whole Packet French Onion Soup Mix (dry)", - "1 can Beef Consomme", - "1 c. Beef Broth Or Beef Stock", - "1/4 c. Dry Sherry Or White Wine (or You May Omit)", - "2 tbsp. Worcestershire Sauce", - "1 tbsp. Soy Sauce", - "1 c. Water", - "10 whole Crusty Deli Rolls/sub Rolls, Toasted" + "1 boneless ribeye loin or sirloin (about 4 to 5 pounds)", + "1 tbsp. kosher salt", + "2 tbsp. black pepper", + "1/2 tsp. ground oregano", + "1/2 tsp. ground thyme", + "2 whole large onions, thinly sliced", + "5 cloves garlic, minced", + "1 whole packet French onion soup mix (dry)", + "1 can beef consomme", + "1 c. beef broth or beef stock", + "1/4 c. dry sherry or white wine (optional)", + "2 tbsp. Worcestershire sauce", + "1 tbsp. soy sauce", + "1 c. water", + "10 whole crusty deli rolls or sub rolls, toasted" ], instructions: [ - "Preheat the oven to 475 degrees. Tie the piece of meat tightly with a couple of pieces of kitchen twine.", - "Mix the salt, pepper, oregano and thyme and rub it all over the surface of the beef. Place the beef on a roasting rack in a roasting pan and roast it to medium-rare, about 20 to 25 minutes, until it registers 125 degrees on a meat thermometer. (If you want it less pink, go to 135.) Remove the meat to a cutting board and cover it with foil.", - "Return the roasting pan to the stovetop burner over medium-high heat. Add the onions and garlic and stir them around for 5 minutes, until they are soft and golden. Sprinkle in the soup mix, then pour in the consomme, broth, sherry, Worcestershire, soy, and water. Bring it to a boil, then reduce the heat to low. Simmer for 45 minutes, stirring occasionally, to develop the flavors. Add more water if it starts to evaporate too much. Pour the liquid through a fine mesh strainer and reserve both the liquid and the onions.", - "Slice the beef very thin. Pile meat and onions on rolls, then serve with dishes of jus." + "Preheat the oven to 475˚ degrees. Tie the piece of meat tightly with a couple of pieces of kitchen twine.", + "In a small bowl, mix together the salt, pepper, oregano and thyme. Rub the seasoning mixture all over the surface of the beef. Place the beef on a roasting rack in a roasting pan and roast it to medium-rare, about 20 to 25 minutes, until it registers 125˚ degrees on a meat thermometer. (If you want it less pink, go to 135˚.) Remove the meat to a cutting board and cover it with foil.", + "Place the roasting pan on the stovetop burner over medium-high heat. Add the onions and garlic and cook, stirring, for 5 minutes, until they are soft and golden. Sprinkle in the soup mix, then pour in the consomme, broth, sherry, Worcestershire, soy sauce, and water. Bring it to a boil, then reduce the heat to low. Simmer for 45 minutes, stirring occasionally, to develop the flavors. Add more water if it starts to evaporate too much. Pour the liquid through a fine mesh strainer and reserve both the liquid and the onions.", + "Slice the beef very thin. Pile the beef and caramelized onions on the toasted rolls, then serve with a side of jus." ], tags: [], time: { @@ -40,6 +40,6 @@ module.exports = { }, servings: "10 servings", image: - "https://hips.hearstapps.com/amv-prod-tpw.s3.amazonaws.com/wp-content/uploads/2016/05/dsc_0580.jpg?crop=1xw:0.7513148009015778xh;center,top&resize=1200:*" + "https://hips.hearstapps.com/amv-prod-tpw.s3.amazonaws.com/wp-content/uploads/2016/05/dsc_0580.jpg?crop=1.00xw:0.754xh;0,0.0386xh&resize=1200:*" } }; diff --git a/test/constants/woolworthsConstants.js b/test/constants/woolworthsConstants.js index 614d1ad..d563b53 100644 --- a/test/constants/woolworthsConstants.js +++ b/test/constants/woolworthsConstants.js @@ -1,43 +1,46 @@ module.exports = { testUrl: - "https://www.woolworths.com.au/shop/recipedetail/5156/classic-guacamole", + "https://www.woolworths.com.au/shop/recipedetail/7440/bean-tomato-nachos", invalidUrl: "https://woolworths.com.au/shop/recipedetail/notarealurl", invalidDomainUrl: "www.invalid.com", nonRecipeUrl: "https://www.woolworths.com.au/shop/recipedetail/0000/not-a-recipe", expectedRecipe: { - name: "Classic Guacamole", + name: "Bean & Tomato Nachos", ingredients: [ - "1 small red onion", - "1-2 fresh red chillies", - "3 ripe avocados", - "1 bunch of fresh coriander", - "6 ripe cherry tomatoes", - "2 limes", - "Pantry Staples", - "extra virgin olive oil" + "2 tsp cumin", + "2 tsp coriander; plus 1 bunch coriander, chopped", + "1 tsp smoked paprika", + "1 small red onion, roughly chopped", + "400g can red kidney beans, drained, rinsed", + "400g can cannellini beans, drained, rinsed", + "400g Solanato tomatoes", + "1 small red capsicum, deseeded, diced", + "2 tbs lime juice", + "2 tbs extra virgin olive oil", + "200g Macro Organic corn chips", + "2 cups grated low-fat tasty cheese", + "2 avocados", + "1/3 cup light sour cream (optional)" ], instructions: [ - "Peel the onion and deseed 1 chilli, then roughly chop it all on a large board.", - "Destone the avocados and scoop the flesh onto the board.", - "Start chopping it all together until fine and well combined.", - "Pick over most of the coriander leaves, roughly chop and add the tomatoes, then continue chopping it all together.", - "Squeeze in the juice from 1 lime and 1 tablespoon of oil, then season to taste with sea salt, black pepper and more lime juice, if needed.", - "Deseed, finely chop and scatter over the remaining chilli if you like more of a kick.", - "Pick over the reserved coriander leaves, then serve.", - "Tip: Super quick and easy, this guacamole recipe is delicious with fajitas, quesadillas, dolloped into a wrap or served as a snack with crunchy veggies." + "Heat a frying pan over medium heat. Add spices and dry fry for 1-2 minutes or until fragrant (see tip).", + "Add onion, 1/2 the beans and 1/2 the tomatoes to a food processor. Using the pulse button, process until chopped. Transfer to a bowl and stir in spices, capsicum, 1 tbs of the lime juice, 1/4 cup coriander, remaining beans and oil.", + "Preheat oven to 180°c. Layer bean mix, corn chips and cheese into 1 large or 4 individual ovenproof serving dishes. Bake for 15 minutes or until cheese is melted.", + "Meanwhile, halve remaining tomatoes and place into a bowl. Scoop flesh from avocados and dice. Gently toss with tomatoes, remaining lime juice and 2 tbs coarsely chopped coriander. Serve nachos topped with salsa and sour cream, if using.", + "tip: toasting the spices boosts their flavour by releasing aromatic oils. Keep them moving in the pan to prevent burning." ], tags: [], time: { - prep: "15", - cook: "15", + prep: "15 Mins", + cook: "20 Mins", active: "", inactive: "", ready: "", total: "" }, - servings: "8", + servings: "4", image: - "https://cdn1.woolworths.media/content/recipes/1701-jamie-classicguacamole.jpg" + "https://cdn1.woolworths.media/content/recipes/1804-bean-and-tomato-nachos.jpg" } }; diff --git a/test/recipetineats.test.js b/test/recipetineats.test.js index e69de29..da5c641 100644 --- a/test/recipetineats.test.js +++ b/test/recipetineats.test.js @@ -0,0 +1,5 @@ +"use strict"; +const commonRecipeTest = require("./helpers/commonRecipeTest"); +const constants = require("./constants/recipetineatsConstants"); + +commonRecipeTest("recipeTinEats", constants, "recipetineats.com/"); diff --git a/test/seriouseats.test.js b/test/seriouseats.test.js index 81a541f..10b3202 100644 --- a/test/seriouseats.test.js +++ b/test/seriouseats.test.js @@ -1,10 +1,16 @@ "use strict"; const { assert, expect } = require("chai"); -const seriousEats = require("../scrapers/SeriousEatsScraper"); +const SeriousEats = require("../scrapers/SeriousEatsScraper"); const constants = require("./constants/seriouseatsConstants"); describe("seriousEats", () => { + let seriousEats; + + before(() => { + seriousEats = new SeriousEats(); + }); + it("should fetch the expected recipe", async () => { seriousEats.url = constants.testUrl; let actualRecipe = await seriousEats.fetchRecipe(); @@ -47,8 +53,7 @@ describe("seriousEats", () => { it("should throw an error if sponsored recipe is used", async () => { try { - seriousEats.url = constants.sponsorUrl; - await seriousEats.fetchRecipe(); + seriousEats = new SeriousEats(constants.sponsorUrl); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( diff --git a/test/simplyrecipes.test.js b/test/simplyrecipes.test.js index 7ab00d9..c9c3dc4 100644 --- a/test/simplyrecipes.test.js +++ b/test/simplyrecipes.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const simplyRecipes = require("../scrapers/simplyrecipes"); -const Constants = require("./constants/simplyrecipesConstants"); +const constants = require("./constants/simplyrecipesConstants"); -commonRecipeTest( - "simplyRecipes", - simplyRecipes, - Constants, - "simplyrecipes.com/recipes/" -); +commonRecipeTest("simplyRecipes", constants, "simplyrecipes.com/recipes/"); diff --git a/test/smittenkitchen.test.js b/test/smittenkitchen.test.js index e6fb7c3..acfc659 100644 --- a/test/smittenkitchen.test.js +++ b/test/smittenkitchen.test.js @@ -1,35 +1,37 @@ "use strict"; -const expect = require("chai").expect; -const assert = require("chai").assert; +const { assert, expect } = require("chai"); -const smittenKitchen = require("../scrapers/smittenkitchen"); -const Constants = require("./constants/smittenkitchenConstants"); +const SmittenKitchen = require("../scrapers/SmittenKitchenScraper"); +const constants = require("./constants/smittenkitchenConstants"); describe("smittenKitchen", () => { + let smittenKitchen; + before(() => { + smittenKitchen = new SmittenKitchen(); + }); + it("should fetch the expected recipe (old style)", async () => { - let actualRecipe = await smittenKitchen(Constants.testUrlOld); - expect(JSON.stringify(Constants.expectedRecipeOld)).to.equal( - JSON.stringify(actualRecipe) - ); + smittenKitchen.url = constants.testUrlOld; + let actualRecipe = await smittenKitchen.fetchRecipe(); + expect(constants.expectedRecipeOld).to.deep.equal(actualRecipe); }); it("should fetch the expected recipe (new style V1)", async () => { - let actualRecipe = await smittenKitchen(Constants.testUrlNewV1); - expect(JSON.stringify(Constants.expectedRecipeNewV1)).to.equal( - JSON.stringify(actualRecipe) - ); + smittenKitchen.url = constants.testUrlNewV1; + let actualRecipe = await smittenKitchen.fetchRecipe(); + expect(constants.expectedRecipeNewV1).to.deep.equal(actualRecipe); }); it("should fetch the expected recipe (new style V2)", async () => { - let actualRecipe = await smittenKitchen(Constants.testUrlNewV2); - expect(JSON.stringify(Constants.expectedRecipeNewV2)).to.equal( - JSON.stringify(actualRecipe) - ); + smittenKitchen.url = constants.testUrlNewV2; + let actualRecipe = await smittenKitchen.fetchRecipe(); + expect(constants.expectedRecipeNewV2).to.deep.equal(actualRecipe); }); it("should throw an error if invalid url is used", async () => { try { - await smittenKitchen(Constants.invalidDomainUrl); + smittenKitchen.url = constants.invalidDomainUrl; + await smittenKitchen.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal( @@ -40,7 +42,8 @@ describe("smittenKitchen", () => { it("should throw an error if a problem occurred during page retrieval", async () => { try { - await smittenKitchen(Constants.invalidUrl); + smittenKitchen.url = constants.invalidUrl; + await smittenKitchen.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); @@ -49,7 +52,8 @@ describe("smittenKitchen", () => { it("should throw an error if non-recipe page is used", async () => { try { - await smittenKitchen(Constants.nonRecipeUrl); + smittenKitchen.url = constants.nonRecipeUrl; + await smittenKitchen.fetchRecipe(); assert.fail("was not supposed to succeed"); } catch (error) { expect(error.message).to.equal("No recipe found on page"); diff --git a/test/tasteofhome.test.js b/test/tasteofhome.test.js index c4ff813..be38e7d 100644 --- a/test/tasteofhome.test.js +++ b/test/tasteofhome.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const tasteOfHome = require("../scrapers/tasteofhome"); -const Constants = require("./constants/tasteofhomeConstants"); +const constants = require("./constants/tasteofhomeConstants"); -commonRecipeTest( - "tasteOfHome", - tasteOfHome, - Constants, - "tasteofhome.com/recipes/" -); +commonRecipeTest("tasteOfHome", constants, "tasteofhome.com/recipes/"); diff --git a/test/theblackpeppercorn.test.js b/test/theblackpeppercorn.test.js index e027452..071f2c3 100644 --- a/test/theblackpeppercorn.test.js +++ b/test/theblackpeppercorn.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const theBlackPeppercorn = require("../scrapers/theblackpeppercorn"); -const Constants = require("./constants/theblackpeppercornConstants"); +const constants = require("./constants/theblackpeppercornConstants"); -commonRecipeTest( - "theBlackPeppercorn", - theBlackPeppercorn, - Constants, - "theblackpeppercorn.com/" -); +commonRecipeTest("theBlackPeppercorn", constants, "theblackpeppercorn.com/"); diff --git a/test/thepioneerwoman.test.js b/test/thepioneerwoman.test.js index d27c96c..8b71fd4 100644 --- a/test/thepioneerwoman.test.js +++ b/test/thepioneerwoman.test.js @@ -1,11 +1,9 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const thePioneerWoman = require("../scrapers/thepioneerwoman"); -const Constants = require("./constants/thepioneerwomanConstants"); +const constants = require("./constants/thepioneerwomanConstants"); commonRecipeTest( "thePioneerWoman", - thePioneerWoman, - Constants, + constants, "thepioneerwoman.com/food-cooking/" ); diff --git a/test/therecipecritic.test.js b/test/therecipecritic.test.js index d622c58..c7b2f84 100644 --- a/test/therecipecritic.test.js +++ b/test/therecipecritic.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const theRecipeCritic = require("../scrapers/therecipecritic"); -const Constants = require("./constants/therecipecriticConstants"); +const constants = require("./constants/therecipecriticConstants"); -commonRecipeTest( - "theRecipeCritic", - theRecipeCritic, - Constants, - "therecipecritic.com/" -); +commonRecipeTest("theRecipeCritic", constants, "therecipecritic.com/"); diff --git a/test/thespruceeats.test.js b/test/thespruceeats.test.js index 64a5f0e..7fe4000 100644 --- a/test/thespruceeats.test.js +++ b/test/thespruceeats.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const theSpruceEats = require("../scrapers/thespruceeats"); -const Constants = require("./constants/thespruceeatsConstants"); +const constants = require("./constants/thespruceeatsConstants"); -commonRecipeTest( - "theSpruceEats", - theSpruceEats, - Constants, - "thespruceeats.com/" -); +commonRecipeTest("theSpruceEats", constants, "thespruceeats.com/"); diff --git a/test/whatsgabycooking.test.js b/test/whatsgabycooking.test.js index b24c841..5d831b9 100644 --- a/test/whatsgabycooking.test.js +++ b/test/whatsgabycooking.test.js @@ -1,11 +1,5 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const whatsGabyCooking = require("../scrapers/whatsgabycooking"); -const Constants = require("./constants/whatsgabycookingConstants"); +const constants = require("./constants/whatsgabycookingConstants"); -commonRecipeTest( - "whatsGabyCooking", - whatsGabyCooking, - Constants, - "whatsgabycooking.com/" -); +commonRecipeTest("whatsGabyCooking", constants, "whatsgabycooking.com/"); diff --git a/test/woolworths.test.js b/test/woolworths.test.js index fb43911..44eb5b2 100644 --- a/test/woolworths.test.js +++ b/test/woolworths.test.js @@ -1,11 +1,9 @@ "use strict"; const commonRecipeTest = require("./helpers/commonRecipeTest"); -const woolworths = require("../scrapers/woolworths"); -const Constants = require("./constants/woolworthsConstants"); +const constants = require("./constants/woolworthsConstants"); commonRecipeTest( "woolworths", - woolworths, - Constants, + constants, "woolworths.com.au/shop/recipedetail/" ); From 356dccc8856a2f94508a1c4c612ccbe43f5840ca Mon Sep 17 00:00:00 2001 From: jadkins89 Date: Mon, 25 Jan 2021 12:35:20 -0700 Subject: [PATCH 11/11] ReadMe grammer fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ac4b6c7..806888d 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ Don't see a website you'd like to scrape? Open an [issue](https://github.com/jad ## Recipe Object -Depending on the recipe, certain fields may be left blank. All fields are represented as strings or arrays of strings. The name, ingredients, and instructions property are required for schema validation. +Depending on the recipe, certain fields may be left blank. All fields are represented as strings or arrays of strings. The name, ingredients, and instructions properties are required for schema validation. ```javascript {