Source code for spreads.metadata

# -*- coding: utf-8 -*-

# Copyright (C) 2014 Johannes Baiter <johannes.baiter@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.

# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

"""
Metadata class and utility functions.

:py:func:`get_isbn_suggestions` and :py:func:`get_isbn_metadata` return a
dictionary with the following keys (which corresponds to the Dublin Core
field of the same name): `creator`, `identifier`, `date`, `language`.
"""

from __future__ import division, unicode_literals

from collections import MutableMapping

import isbnlib
from isbnlib import _goom as googlebooks
from spreads.vendor.bagit import BagInfo


def _format_isbnlib(isbnrecord):
    meta = {}
    for k, v in isbnrecord.items():
        # Ignore empty fields
        if not v:
            continue
        if k == 'Authors':
            meta['creator'] = v
        elif k == 'ISBN-13':
            meta['identifier'] = ["ISBN:{0}".format(v)]
        elif k == 'Year':
            meta['date'] = v
        elif k == 'Publisher':
            meta['publisher'] = [v]
        elif k == 'Language':
            meta['language'] = [v]
        else:
            meta[k.lower()] = v
    return meta


[docs]def get_isbn_suggestions(query): """ For a given `query`, return a list of metadata suggestions. :param query: Search query :type query: unicode :returns: List of suggestions :rtype: list of dict """ if isinstance(query, unicode): query = query.encode('utf-8') results = googlebooks.query(query) out_list = [] for data in results: out_list.append(_format_isbnlib(data)) return out_list
[docs]def get_isbn_metadata(isbn): """ For a given valid ISBN number (-10 or -13) return the corresponding metadata. :param isbn: A valid ISBN-10 or ISBN-13 :type isbn: unicode :returns: Metadata for ISBN :rtype: dict or `None` if ISBN is not valid or does not exist """ try: rv = isbnlib.meta(isbn) if rv: return _format_isbnlib(rv) except isbnlib.NotValidISBNError: return None
[docs]class SchemaField(object): """ Definition of a field in a metadata schema. :attr key: Key/field name :type key: unicode :attr description: Description of the field :type description: unicode :attr multivalued: Whether the field can hold multiple values :type multivalued: bool """
[docs] def __init__(self, key, description=None, multivalued=False): self.key = key self.multivalued = multivalued if not description: description = key.capitalize() + ("(s)" if multivalued else "") self.description = description
def to_dict(self): return { 'key': self.key, 'description': self.description, 'multivalued': self.multivalued, } def __repr__(self): return ("SchemaField(key={0}, description={1}, multivalued={2})" .format(self.key, self.description, self.multivalued))
[docs]class Metadata(MutableMapping): """ dict-like object that has a schema of metadata fields (currently hard-wired to Dublin Core) and persists all operations to a `dcmeta.txt` text file on the disk. """ # TODO: This should really be exposed over the plugin API so that plugins # can specify custom schemas that would render across all UIs, # similar to `OptionTemplate` for the configuration. FILENAME = 'dcmeta.txt' SCHEMA = [ SchemaField('title'), SchemaField('creator', multivalued=True), SchemaField('date'), SchemaField('publisher', multivalued=True), SchemaField('language', multivalued=True), SchemaField('extent', description="Extent/Number of pages"), SchemaField('identifier', multivalued=True), ] @classmethod def _schemafield_for_key(cls, key): try: return next(f for f in cls.SCHEMA if f.key == key) except StopIteration: raise KeyError("Could not find field '{0}' in schema".format(key))
[docs] def __init__(self, base_path): """ Create a new instance and try to load current values from an existing file. :param base_path: Directory where `dcmeta.txt` should be stored :type path: :py:class:`pathlib.Path` """ self.filepath = base_path/self.FILENAME self._backingstore = BagInfo(unicode(self.filepath))
def __getitem__(self, key): val = self._backingstore[key] schemafield = self._schemafield_for_key(key) if schemafield.multivalued and not type(val) in (tuple, list): val = [val] return val def __setitem__(self, key, value): self._schemafield_for_key(key) self._backingstore[key] = value def __delitem__(self, key): del self._backingstore[key] def __iter__(self): return iter(self._backingstore) def __len__(self): return len(self._backingstore)