Source code for zope.contenttype.parse

##############################################################################
#
# Copyright (c) 2001, 2002 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""MIME Content-Type parsing helper functions.

This supports parsing `RFC 1341`_ Content-Type values, including
quoted-string values as defined in `RFC 822`_.

.. _RFC 1341: https://tools.ietf.org/html/rfc1341
.. _RFC 822: https://tools.ietf.org/html/rfc822

"""
__docformat__ = "reStructuredText"

import re


# TODO: This still needs to support comments in structured fields as
# specified in RFC 2822.


[docs]def parse(string): """ Parse the given string as a MIME type. This uses :func:`parseOrdered` and can raise the same exceptions it does. :return: A tuple ``(major, minor, params)`` where ``major`` and ``minor`` are the two parts of the type, and ``params`` is a dictionary containing any parameters by name. :param str string: The string to parse. """ major, minor, params = parseOrdered(string) d = {} for (name, value) in params: d[name] = value return major, minor, d
[docs]def parseOrdered(string): """ Parse the given string as a MIME type. :return: A tuple ``(major, minor, params)`` where ``major`` and ``minor`` are the two parts of the type, and ``params`` is a sequence of the parameters in order. :raises ValueError: If the *string* is malformed. :param str string: The string to parse. """ if ";" in string: type, params = string.split(";", 1) params = _parse_params(params) else: type = string params = [] if "/" not in type: raise ValueError("content type missing major/minor parts: %r" % type) type = type.strip() major, minor = type.lower().split("/", 1) return _check_token(major.strip()), _check_token(minor.strip()), params
def _parse_params(string): result = [] string = string.strip() while string: if "=" not in string: raise ValueError("parameter values are not optional") name, rest = string.split("=", 1) name = _check_token(name.strip().lower()) rest = rest.strip() # rest is: value *[";" parameter] if rest[:1] == '"': # quoted-string, defined in RFC 822. m = _quoted_string_match(rest) if m is None: raise ValueError("invalid quoted-string in %r" % rest) value = m.group() rest = rest[m.end():].strip() if rest[:1] not in ("", ";"): raise ValueError( "invalid token following quoted-string: %r" % rest) rest = rest[1:] value = _unescape(value) elif ";" in rest: value, rest = rest.split(";") value = _check_token(value.strip()) else: value = _check_token(rest.strip()) rest = "" result.append((name, value)) string = rest.strip() return result _quoted_string_match = re.compile('"(?:\\\\.|[^"\n\r\\\\])*"', re.DOTALL).match _token_match = re.compile("[^][ \t\n\r()<>@,;:\"/?=\\\\]+$").match def _check_token(string): if _token_match(string) is None: raise ValueError('"%s" is not a valid token' % string) return string def _unescape(string): assert string[0] == '"' assert string[-1] == '"' string = string[1:-1] if "\\" in string: string = re.sub(r"\\(.)", r"\1", string) return string
[docs]def join(spec): """ Given a three-part tuple as produced by :func:`parse` or :func:`parseOrdered`, return the string representation. :returns: The string representation. For example, given ``('text', 'plain', [('encoding','utf-8')])``, this will produce ``'text/plain;encoding=utf-8'``. :rtype: str """ (major, minor, params) = spec pstr = "" try: params.items except AttributeError: pass else: params = params.items() # ensure a predictable order: params = sorted(params) for name, value in params: pstr += ";{}={}".format(name, _escape(value)) return "{}/{}{}".format(major, minor, pstr)
def _escape(string): try: return _check_token(string) except ValueError: # '\\' must be first for c in '\\"\n\r': string = string.replace(c, "\\" + c) return '"%s"' % string