Source code for cumin.backends.puppetdb

# pylint: skip-file
# See https://github.com/PyCQA/astroid/issues/437
"""PuppetDB backend."""
from string import capwords

import pyparsing as pp
import requests

from requests.packages import urllib3

from cumin import nodeset, nodeset_fromlist
from cumin.backends import BaseQuery, InvalidQueryError


CATEGORIES = ('C', 'F', 'O', 'P', 'R')
""":py:func:`tuple`: available categories in the grammar.

* ``C``: shortcut for querying resources of type ``Class``, equivalent of `R:Class = class_path``.
* ``F``: for querying facts.
* ``O``: shortcut for querying resources of type ``Class`` that starts with ``Role::``.
* ``P``: shortcut for querying resources of type ``Class`` that starts with ``Profile::``.
* ``R``: for querying generic resources.
"""

OPERATORS = ('=', '>=', '<=', '<', '>', '~')
""":py:func:`tuple`: available operators in the grammar, the same available in PuppetDB API.

The ``~`` one is used for regex matching.
"""


[docs]def grammar(): """Define the query grammar. Backus-Naur form (BNF) of the grammar:: <grammar> ::= <item> | <item> <and_or> <grammar> <item> ::= [<neg>] <query-token> | [<neg>] "(" <grammar> ")" <query-token> ::= <token> | <hosts> <token> ::= <category>:<key> [<operator> <value>] Given that the pyparsing library defines the grammar in a BNF-like style, for the details of the tokens not specified above check directly the source code. Returns: pyparsing.ParserElement: the grammar parser. """ # Boolean operators and_or = (pp.CaselessKeyword('and') | pp.CaselessKeyword('or'))('bool') # 'neg' is used as label to allow the use of dot notation, 'not' is a reserved word in Python neg = pp.CaselessKeyword('not')('neg') operator = pp.oneOf(OPERATORS, caseless=True)('operator') # Comparison operators quoted_string = pp.quotedString.copy().addParseAction(pp.removeQuotes) # Both single and double quotes are allowed # Parentheses lpar = pp.Literal('(')('open_subgroup') rpar = pp.Literal(')')('close_subgroup') # Hosts selection: glob (*) and clustershell (,!&^[]) syntaxes are allowed: # i.e. host10[10-42].*.domain hosts = quoted_string | (~(and_or | neg) + pp.Word(pp.alphanums + '-_.*,!&^[]')) # Key-value token for allowed categories using the available comparison operators # i.e. F:key = value category = pp.oneOf(CATEGORIES, caseless=True)('category') key = pp.Word(pp.alphanums + '-_.%@:')('key') selector = pp.Combine(category + ':' + key) # i.e. F:key # All printables characters except the parentheses that are part of this or the global grammar all_but_par = ''.join([c for c in pp.printables if c not in ('(', ')', '{', '}')]) value = (quoted_string | pp.Word(all_but_par))('value') token = selector + pp.Optional(operator + value) # Final grammar, see the docstring for its BNF based on the tokens defined above # Groups are used to split the parsed results for an easy access full_grammar = pp.Forward() item = pp.Group(pp.Optional(neg) + (token | hosts('hosts'))) | pp.Group( pp.Optional(neg) + lpar + full_grammar + rpar) full_grammar << item + pp.ZeroOrMore(pp.Group(and_or) + full_grammar) # pylint: disable=expression-not-assigned
return full_grammar
[docs]class PuppetDBQuery(BaseQuery): """PuppetDB query builder. The `puppetdb` backend allow to use an existing PuppetDB instance for the hosts selection. The supported PuppetDB API versions are 3 and 4. It can be specified via the api_version configuration key, if not configured, the v4 will be used. * Each query part can be composed with the others using boolean operators (``and``, ``or``, ``not``) * Multiple query parts can be grouped together with parentheses (``(``, ``)``). * A query part can be of two different types: * ``Hostname matching``: this is a simple string that be used to match directly the hostname of the hosts in the selected backend. It allows for glob expansion (``*``) and the use of the powerful :py:class:`ClusterShell.NodeSet.NodeSet`. * ``Category matching``: an identifier composed by a category, a colon and a key, followed by a comparison operator and a value, as in ``F:key = value``. Some query examples: * All hosts: ``*`` * Hosts globbing: ``host10*`` * :py:class:`ClusterShell.NodeSet.NodeSet` syntax for hosts expansion: ``host10[10-42].domain`` * Category based key-value selection: * ``R:Resource::Name``: query all the hosts that have a resource of type `Resource::Name`. * ``R:Resource::Name = 'resource-title'``: query all the hosts that have a resource of type `Resource::Name` whose title is ``resource-title``. For example ``R:Class = MyModule::MyClass``. * ``R:Resource::Name@field = 'some-value'``: query all the hosts that have a resource of type ``Resource::Name`` whose field ``field`` has the value ``some-value``. The valid fields are: ``tag``, ``certname``, ``type``, ``title``, ``exported``, ``file``, ``line``. The previous syntax is a shortcut for this one with the field ``title``. * ``R:Resource::Name%param = 'some-value'``: query all the hosts that have a resource of type ``Resource::Name`` whose parameter ``param`` has the value ``some-value``. * ``C:Class::Name``: special shortcut to query all the hosts that have a resource of type ``Class`` whose name is ``Class::Name``. The ``Class::Name`` part is completely arbitrary and depends on the puppet hierarchy chosen. It's equivalent to ``R:Class = Class::Name``, with the addition that the ``param`` and ``field`` selectors described above can be used directly without the need to add another condition. * ``O:Module::Name``: special shortcut to query all the hosts that have a resource of type ``Class`` whose name is ``Role::Module::Name``. The ``Module::Name`` part is completely arbitrary and depends on the puppet hierarchy chosen. It's equivalent to ``R:Class = Role::Module::Name``, with the addition that the ``param`` and ``field`` selectors described above can be used directly without the need to add another condition, although usually roles should not have parameters in the role/profile Puppet paradigm. * ``P:Module::Name``: special shortcut to query all the hosts that have a resource of type ``Class`` whose name is ``Profile::Module::Name``. The ``Module::Name`` part is completely arbitrary and depends on the puppet hierarchy chosen. It's equivalent to ``R:Class = Profile::Module::Name``, with the addition that the ``param`` and ``field`` selectors described above can be used directly without the need to add another condition. * ``F:FactName = value``: query all the hosts that have a fact ``FactName``, as reported by facter, with the value ``value``. * Mixed facts/resources queries are not supported, but the same result can be achieved using the main grammar with multiple subqueries for the PuppetDB backend. * A complex selection for facts: ``host10[10-42].*.domain or (not F:key1 = value1 and host10*) or (F:key2 > value2 and F:key3 ~ '^value[0-9]+')`` """ base_url_template = 'https://{host}:{port}' """:py:class:`str`: string template in the :py:meth:`str.format` style used to generate the base URL of the PuppetDB server.""" endpoints = {'C': 'resources', 'F': 'nodes', 'O': 'resources', 'P': 'resources', 'R': 'resources'} """:py:class:`dict`: dictionary with the mapping of the available categories in the grammar to the PuppetDB API endpoints.""" category_prefixes = {'C': '', 'O': 'Role', 'P': 'Profile'} """:py:class:`dict`: dictionary with the mapping of special categories to title prefixes.""" grammar = grammar() """:py:class:`pyparsing.ParserElement`: load the grammar parser only once in a singleton-like way."""
[docs] def __init__(self, config): """Query constructor for the PuppetDB backend. :Parameters: according to parent :py:meth:`cumin.backends.BaseQuery.__init__`. """ super().__init__(config) self.grouped_tokens = None self.current_group = self.grouped_tokens self._endpoint = None puppetdb_config = self.config.get('puppetdb', {}) base_url = self.base_url_template.format( host=puppetdb_config.get('host', 'localhost'), port=puppetdb_config.get('port', 443)) self.api_version = puppetdb_config.get('api_version', 4) if self.api_version == 3: self.url = base_url + '/v3/' self.hosts_keys = {'nodes': 'name', 'resources': 'certname'} elif self.api_version == 4: self.url = base_url + '/pdb/query/v4/' self.hosts_keys = {'nodes': 'certname', 'resources': 'certname'} else: raise InvalidQueryError('Unsupported PuppetDB API version {ver}'.format(ver=self.api_version)) for exception in puppetdb_config.get('urllib3_disable_warnings', []):
urllib3.disable_warnings(category=getattr(urllib3.exceptions, exception)) @property def endpoint(self): """Endpoint in the PuppetDB API for the current query. :Getter: Returns the current `endpoint` or a default value if not set. :Setter: :py:class:`str`: the value to set the `endpoint` to. Raises: cumin.backends.InvalidQueryError: if trying to set it to an invalid `endpoint` or mixing endpoints in a single query. """ return self._endpoint or 'nodes' @endpoint.setter def endpoint(self, value): """Setter for the `endpoint` property. The relative documentation is in the getter.""" if value not in self.endpoints.values(): raise InvalidQueryError("Invalid value '{endpoint}' for endpoint property".format(endpoint=value)) if self._endpoint is not None and value != self._endpoint: raise InvalidQueryError('Mixed endpoints are not supported, use the global grammar to mix them.') self._endpoint = value
[docs] def _open_subgroup(self): """Handle subgroup opening.""" token = PuppetDBQuery._get_grouped_tokens() token['parent'] = self.current_group self.current_group['tokens'].append(token)
self.current_group = token
[docs] def _close_subgroup(self): """Handle subgroup closing."""
self.current_group = self.current_group['parent']
[docs] @staticmethod def _get_grouped_tokens(): """Return an empty grouped tokens structure. Returns: dict: the dictionary with the empty grouped tokens structure. """
return {'parent': None, 'bool': None, 'tokens': []}
[docs] def _build(self, query_string): """Override parent class _build method to reset tokens and add logging. :Parameters: according to parent :py:meth:`cumin.backends.BaseQuery._build`. """ self.grouped_tokens = PuppetDBQuery._get_grouped_tokens() self.current_group = self.grouped_tokens super()._build(query_string)
self.logger.trace('Query tokens: %s', self.grouped_tokens)
[docs] def _execute(self): """Concrete implementation of parent abstract method. :Parameters: according to parent :py:meth:`cumin.backends.BaseQuery._execute`. Returns: ClusterShell.NodeSet.NodeSet: with the FQDNs of the matching hosts. """ query = self._get_query_string(group=self.grouped_tokens).format(host_key=self.hosts_keys[self.endpoint]) hosts = self._api_call(query) unique_hosts = nodeset_fromlist([host[self.hosts_keys[self.endpoint]] for host in hosts]) self.logger.debug("Queried puppetdb for '%s', got '%d' results.", query, len(unique_hosts))
return unique_hosts
[docs] def _add_category(self, category, key, value=None, operator='=', neg=False): """Add a category token to the query 'F:key = value'. Arguments: category (str): the category of the token, one of :py:const:`CATEGORIES`. key (str): the key for this category. value (str, optional): the value to match, if not specified the key itself will be matched. operator (str, optional): the comparison operator to use, one of :py:const:`OPERATORS`. neg (bool, optional): whether the token must be negated. Raises: cumin.backends.InvalidQueryError: on internal parsing error. """ self.endpoint = self.endpoints[category] if operator == '~': value = value.replace(r'\\', r'\\\\') # Required by PuppetDB API if category in ('C', 'O', 'P'): query = self._get_special_resource_query(category, key, value, operator) elif category == 'R': query = self._get_resource_query(key, value, operator) elif category == 'F': query = '["{op}", ["fact", "{key}"], "{val}"]'.format(op=operator, key=key, val=value) else: # pragma: no cover - this should never happen raise InvalidQueryError( "Got invalid category '{category}', one of F|O|P|R expected".format(category=category)) if neg: query = '["not", {query}]'.format(query=query)
self.current_group['tokens'].append(query)
[docs] def _add_hosts(self, hosts, neg=False): """Add a list of hosts to the query. Arguments: hosts (ClusterShell.NodeSet.NodeSet): with the list of hosts to search. neg (bool, optional): whether the token must be negated. """ if not hosts: return hosts_tokens = [] for host in hosts: operator = '=' # Convert a glob expansion into a regex if '*' in host: operator = '~' host = r'^' + host.replace('.', r'\\.').replace('*', '.*') + r'$' hosts_tokens.append('["{op}", "{{host_key}}", "{host}"]'.format(op=operator, host=host)) query = '["or", {hosts}]'.format(hosts=', '.join(hosts_tokens)) if neg: query = '["not", {query}]'.format(query=query)
self.current_group['tokens'].append(query)
[docs] def _parse_token(self, token): """Concrete implementation of parent abstract method. :Parameters: according to parent :py:meth:`cumin.backends.BaseQuery._parse_token`. Raises: cumin.backends.InvalidQueryError: on internal parsing error. """ if isinstance(token, str): return token_dict = token.asDict() # Based on the token type build the corresponding query object if 'open_subgroup' in token_dict: self._open_subgroup() for subtoken in token: self._parse_token(subtoken) self._close_subgroup() elif 'bool' in token_dict: self._add_bool(token_dict['bool']) elif 'hosts' in token_dict: token_dict['hosts'] = nodeset(token_dict['hosts']) self._add_hosts(**token_dict) elif 'category' in token_dict: self._add_category(**token_dict) else: # pragma: no cover - this should never happen raise InvalidQueryError(
"No valid key found in token, one of bool|hosts|category expected: {token}".format(token=token_dict))
[docs] def _get_resource_query(self, key, value=None, operator='='): # pylint: disable=no-self-use """Build a resource query based on the parameters, resolving the special cases for ``%params`` and ``@field``. Arguments: key (str): the key of the resource. value (str, optional): the value to match, if not specified the key itself will be matched. operator (str, optional): the comparison operator to use, one of :py:const:`OPERATORS`. Returns: str: the resource query. Raises: cumin.backends.InvalidQueryError: on invalid combinations of parameters. """ if all(char in key for char in ('%', '@')): raise InvalidQueryError(("Resource key cannot contain both '%' (query a resource's parameter) and '@' " "(query a resource's field)")) elif '%' in key: # Querying a specific parameter of the resource if operator == '~' and self.api_version == 3: raise InvalidQueryError('Regex operations are not supported in PuppetDB API v3 for resource parameters') key, param = key.split('%', 1) query_part = ', ["{op}", ["parameter", "{param}"], "{value}"]'.format(op=operator, param=param, value=value) elif '@' in key: # Querying a specific field of the resource key, field = key.split('@', 1) query_part = ', ["{op}", "{field}", "{value}"]'.format(op=operator, field=field, value=value) elif value is None: # Querying a specific resource type query_part = '' else: # Querying a specific resource title if key.lower() == 'class' and operator != '~': value = capwords(value, '::') # Auto ucfirst the class title query_part = ', ["{op}", "title", "{value}"]'.format(op=operator, value=value) query = '["and", ["=", "type", "{type}"]{query_part}]'.format(type=capwords(key, '::'), query_part=query_part)
return query
[docs] def _get_special_resource_query(self, category, key, value, operator): """Build a query for Roles and Profiles, resolving the special cases for ``%params`` and ``@field``. Arguments: category (str): the category of the token, one of :py:data:`category_prefixes` keys. key (str): the key of the resource to use as a suffix for the Class title matching. value (str, optional): the value to match in case ``%params`` or ``@field`` is specified. operator (str, optional): the comparison operator to use if there is a value, one of :py:const:`OPERATORS`. Returns: str: the resource query. Raises: cumin.backends.InvalidQueryError: on invalid combinations of parameters. """ if all(char in key for char in ('%', '@')): raise InvalidQueryError(("Resource key cannot contain both '%' (query a resource's parameter) and '@' " "(query a resource's field)")) elif '%' in key: special = '%' key, param = key.split('%') elif '@' in key: special = '@' key, param = key.split('@') else: special = None if value is not None: raise InvalidQueryError(("Invalid query of the form '{category}:key = value'. The matching of a value " "is accepted only when using %param or @field.").format(category=category)) if self.category_prefixes[category]: title = '{prefix}::{key}'.format(prefix=self.category_prefixes[category], key=key) else: title = key query = self._get_resource_query('Class', title, '=') if special is not None: param_query = self._get_resource_query(''.join(('Class', special, param)), value, operator) query = '["and", {query}, {param_query}]'.format(query=query, param_query=param_query)
return query
[docs] def _get_query_string(self, group): """Recursively build and return the PuppetDB query string. Arguments: group (dict): a dictionary with the grouped tokens. Returns: str: the query string for the PuppetDB API. """ if group['bool']: query = '["{bool}", '.format(bool=group['bool']) else: query = '' last_index = len(group['tokens']) for i, token in enumerate(group['tokens']): if isinstance(token, dict): query += self._get_query_string(group=token) else: query += token if i < last_index - 1: query += ', ' if group['bool']: query += ']'
return query
[docs] def _add_bool(self, bool_op): """Add a boolean AND or OR query block to the query and validate logic. Arguments: bool_op (str): the boolean operator to add to the query: ``and``, ``or``. Raises: cumin.backends.InvalidQueryError: if an invalid boolean operator was found. """ if self.current_group['bool'] is None: self.current_group['bool'] = bool_op elif self.current_group['bool'] == bool_op: return else: raise InvalidQueryError("Got unexpected '{bool}' boolean operator, current operator was '{current}'".format(
bool=bool_op, current=self.current_group['bool']))
[docs] def _api_call(self, query): """Execute a query to PuppetDB API and return the parsed JSON. Arguments: query (str): the query parameter to send to the PuppetDB API. Raises: requests.HTTPError: if the PuppetDB API call fails. """ if self.api_version == 3: resources = requests.get(self.url + self.endpoint, params={'query': query}, verify=True) else: resources = requests.post(self.url + self.endpoint, json={'query': query}, verify=True) resources.raise_for_status()
return resources.json() GRAMMAR_PREFIX = 'P' """:py:class:`str`: the prefix associate to this grammar, to register this backend into the general grammar. Required by the backend auto-loader in :py:meth:`cumin.grammar.get_registered_backends`.""" query_class = PuppetDBQuery # pylint: disable=invalid-name """Required by the backend auto-loader in :py:meth:`cumin.grammar.get_registered_backends`."""