Source code for nti.property.dataurl

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Objects for working with the RFC2397 ``data`` URL scheme::

    data:[<MIME-type>][;charset=<encoding>][;base64],<data>

The encoding is indicated by ``;base64``. If it's present the data is
encoded as base64. Without it the data (as a sequence of octets) is
represented using ASCII encoding for octets inside the range of safe
URL characters and using the standard %xx hex encoding of URLs for
octets outside that range. If ``<MIME-type>`` is omitted, it defaults
to ``text/plain;charset=US-ASCII``. (As a shorthand, the type can be
omitted but the charset parameter supplied.)

"""

from __future__ import print_function, absolute_import, division
__docformat__ = "restructuredtext en"

try:
    from urllib.parse import quote
    from urllib.parse import unquote
except ImportError: # pragma: no cover
    # Python 2
    from urllib import quote
    from urllib import unquote

from base64 import b64decode
from base64 import b64encode

from zope.cachedescriptors.property import CachedProperty

# Originally inspired by
# http://code.google.com/p/python-mom/source/browse/mom/net/scheme/dataurl.py?


[docs] def decode(data_url): """ Decodes a data URL into raw bytes and metadata. :param data_url: The data url string. If a mime-type definition is missing in the metadata, ``text/plain;charset=US-ASCII`` will be used as default mime-type. :returns: A 2-tuple: ``(bytes, mime_type_string)`` The mime_type string will not be parsed. See :func:`zope.contenttype.parse.parse` for that. """ if isinstance(data_url, DataURL): return data_url.data, data_url.mimeType return _do_decode(data_url)
def _do_decode(data_url): metadata, encoded = data_url.rsplit(",", 1) _, metadata = metadata.split("data:", 1) metadata_parts = metadata.rsplit(";", 1) if metadata_parts[-1] == "base64": _decode = b64decode metadata_parts = metadata_parts[:-1] else: _decode = unquote if not metadata_parts or not metadata_parts[0]: metadata_parts = ("text/plain;charset=US-ASCII",) mime_type = metadata_parts[0] raw_bytes = _decode(encoded) return raw_bytes, mime_type
[docs] class DataURL(str): # native string on both py2 and py3 """ Represents a data URL with convenient access to its raw bytes and mime type. """ @CachedProperty def _decoded(self): return _do_decode(self) @property def data(self): return self._decoded[0] @property def mimeType(self): return self._decoded[1]
_def_charset = 'US-ASCII' _marker = object()
[docs] def encode(raw_bytes, mime_type='text/plain', charset=_marker, encoder="base64"): """ Encodes raw bytes into a data URL scheme string. :param raw_bytes: Raw bytes :param mime_type: The mime type, e.g. ``b"text/css"`` or ``b"image/png"``. Default ``b"text/plain"``. :param charset: Set to ``b"utf-8"`` if you want the data URL to contain a ``b"charset=utf-8"`` component. Default ``b'US-ASCII'``. This does not mean however, that your raw_bytes will be encoded by this function. You must ensure that if you specify, ``b"utf-8"`` (or anything else) as the encoding, you have encoded your raw data appropriately. .. note:: This function employs a heuristic to know when to default this parameter (for example, it is not used for image mime types). To be absolutely sure, set it explicitly (None always meaning not to use it). :param encoder: The string "base64" (the default) or None. If None, the data is directly output as quoted ASCII bytes. :returns: Data URL byte string """ if not isinstance(raw_bytes, bytes): # pragma: no cover raise TypeError("only raw bytes can be encoded") if encoder == "base64": _encode = b64encode codec = ";base64," else: # We want ASCII bytes. def _encode(data): return quote(data).encode('ascii') codec = "," mime_type = mime_type or "" if charset is _marker: if mime_type.startswith('text/'): charset = _def_charset else: charset = None charset = ";charset=" + charset if charset else "" encoded = _encode(raw_bytes) if isinstance(encoded, bytes): encoded = encoded.decode("utf-8") # pylint:disable=redefined-variable-type return ''.join(("data:", mime_type, charset, codec, encoded))