Skip to content

Commit

Permalink
ISSUE #1 consideration
Browse files Browse the repository at this point in the history
  • Loading branch information
k0001 committed Sep 25, 2011
1 parent bf2db2f commit c58febf
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions mscrap/mscrap/pipelines.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# coding: utf-8
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/topics/item-pipeline.html

#from scrapy.contrib.exporter.jsonlines import JsonLinesItemExporter
from scrapy import signals
from scrapy.exceptions import DropItem
from scrapy.xlib.pydispatch import dispatcher
from mscrap.items import LegisladorItem


class MscrapPipeline(object):
Expand All @@ -21,10 +24,19 @@ def spider_closed(self, spider):
del self.duplicates[spider]

def process_item(self, item, spider):
if not self._item_valid(item):
raise DropItem
if not item['id'] in self.duplicates[spider] and not self._item_exists(item):
self._item_save(item)
return item

def _item_valid(self, item):
# ISSUE #1: Data for 'Perroni, Ana María' is unavailable.
if isinstance(item, LegisladorItem):
if item['apellido'] == u'Perroni' and item['nombre'] == u'Ana Maria':
return False
return True

def _item_exists(self, item):
# TODO
return False
Expand Down

0 comments on commit c58febf

Please sign in to comment.