Source code for zope.contenttype.parse

##############################################################################
#
# Copyright (c) 2001, 2002 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""MIME Content-Type parsing helper functions.

This supports parsing `RFC 1341`_ Content-Type values, including
quoted-string values as defined in `RFC 822`_.

.. _RFC 1341: https://tools.ietf.org/html/rfc1341
.. _RFC 822: https://tools.ietf.org/html/rfc822

"""
__docformat__ = "reStructuredText"

import re


# TODO: This still needs to support comments in structured fields as
# specified in RFC 2822.


[docs]def parse(string):
    """
    Parse the given string as a MIME type.

    This uses :func:`parseOrdered` and can raise the same
    exceptions it does.

    :return: A tuple ``(major, minor, params)`` where ``major``
      and ``minor`` are the two parts of the type, and ``params``
      is a dictionary containing any parameters by name.
    :param str string: The string to parse.
    """
    major, minor, params = parseOrdered(string)
    d = {}
    for (name, value) in params:
        d[name] = value
    return major, minor, d


[docs]def parseOrdered(string):
    """
    Parse the given string as a MIME type.

    :return: A tuple ``(major, minor, params)``  where ``major``
      and ``minor`` are the two parts of the type, and ``params`` is a
      sequence of the parameters in order.
    :raises ValueError: If the *string* is malformed.
    :param str string: The string to parse.
    """
    if ";" in string:
        type, params = string.split(";", 1)
        params = _parse_params(params)
    else:
        type = string
        params = []
    if "/" not in type:
        raise ValueError("content type missing major/minor parts: %r" % type)
    type = type.strip()

    major, minor = type.lower().split("/", 1)
    return _check_token(major.strip()), _check_token(minor.strip()), params


def _parse_params(string):
    result = []
    string = string.strip()
    while string:
        if "=" not in string:
            raise ValueError("parameter values are not optional")
        name, rest = string.split("=", 1)
        name = _check_token(name.strip().lower())
        rest = rest.strip()

        # rest is: value *[";" parameter]

        if rest[:1] == '"':
            # quoted-string, defined in RFC 822.
            m = _quoted_string_match(rest)
            if m is None:
                raise ValueError("invalid quoted-string in %r" % rest)
            value = m.group()
            rest = rest[m.end():].strip()
            if rest[:1] not in ("", ";"):
                raise ValueError(
                    "invalid token following quoted-string: %r" % rest)
            rest = rest[1:]
            value = _unescape(value)

        elif ";" in rest:
            value, rest = rest.split(";")
            value = _check_token(value.strip())

        else:
            value = _check_token(rest.strip())
            rest = ""

        result.append((name, value))
        string = rest.strip()
    return result


_quoted_string_match = re.compile('"(?:\\\\.|[^"\n\r\\\\])*"', re.DOTALL).match
_token_match = re.compile("[^][ \t\n\r()<>@,;:\"/?=\\\\]+$").match


def _check_token(string):
    if _token_match(string) is None:
        raise ValueError('"%s" is not a valid token' % string)
    return string


def _unescape(string):
    assert string[0] == '"'
    assert string[-1] == '"'
    string = string[1:-1]
    if "\\" in string:
        string = re.sub(r"\\(.)", r"\1", string)
    return string


[docs]def join(spec):
    """
    Given a three-part tuple as produced by :func:`parse` or
    :func:`parseOrdered`, return the string representation.

    :returns: The string representation. For example, given ``('text', 'plain',
      [('encoding','utf-8')])``, this will produce
      ``'text/plain;encoding=utf-8'``.
    :rtype: str
    """
    (major, minor, params) = spec
    pstr = ""
    try:
        params.items
    except AttributeError:
        pass
    else:
        params = params.items()
        # ensure a predictable order:
        params = sorted(params)
    for name, value in params:
        pstr += ";{}={}".format(name, _escape(value))
    return "{}/{}{}".format(major, minor, pstr)


def _escape(string):
    try:
        return _check_token(string)
    except ValueError:
        # '\\' must be first
        for c in '\\"\n\r':
            string = string.replace(c, "\\" + c)
        return '"%s"' % string