Source code for scrapy_mongoengine_item

"""
Scrapy extension to write scraped items using `mongoengine` documents
"""
import copy
from six import with_metaclass
from mongoengine.errors import ValidationError
from scrapy.item import Field, Item, ItemMeta

__title__ = 'scrapy-mongoengine-item'
__version__ = '0.1.5'
__author__ = 'Artur Barseghyan <artur.barseghyan@gmail.com>'
__copyright__ = '2019 Artur Barseghyan'
__license__ = 'GPL 2.0/LGPL 2.1'
__all__ = (
    'MongoEngineItem',
)


def has_custom_primary_key(fields):
    for field in fields:
        if field.primary_key:
            return True
    return False


class MongoEngineItemMeta(ItemMeta):

    def __new__(mcs, class_name, bases, attrs):
        cls = super(MongoEngineItemMeta, mcs).__new__(
            mcs,
            class_name,
            bases,
            attrs
        )
        cls.fields = cls.fields.copy()

        if cls.mongoengine_document:
            cls._document_fields = []
            cls._document_meta = cls.mongoengine_document._meta
            _has_custom_primary_key = has_custom_primary_key(
                cls.mongoengine_document._fields.values()
            )
            for document_field in cls.mongoengine_document._fields:
                if _has_custom_primary_key \
                        or document_field != cls._document_meta['id_field']:
                    cls.fields[document_field] = Field()
                cls._document_fields.append(document_field)
        return cls


[docs]class MongoEngineItem(with_metaclass(MongoEngineItemMeta, Item)): mongoengine_document = None def __init__(self, *args, **kwargs): super(MongoEngineItem, self).__init__(*args, **kwargs) self._instance = None self._errors = None
[docs] def save(self, commit=True): if commit: self.instance.save() return self.instance
[docs] def is_valid(self): self._get_errors() return not bool(self._errors)
def _get_errors(self): if self._errors is not None: return self._errors self._errors = {} try: self.instance.validate() except ValidationError as err: self._errors.update(copy.copy(err.errors)) try: self.instance.clean() except ValidationError as err: self._errors.update(copy.copy(err.errors)) # Uniqueness is not checked, because it is faster to check it when # saving object to database. Just beware, that failed save() # raises IntegrityError instead of ValidationError. return self._errors errors = property(_get_errors) @property def instance(self): if self._instance is None: document_args = dict( (k, self.get(k)) for k in self._values if k in self._document_fields ) self._instance = self.mongoengine_document(**document_args) return self._instance