"""PuppetDB backend."""
from re import IGNORECASE
from string import capwords
import pyparsing as pp
import requests
from requests.packages import urllib3
from cumin import nodeset, nodeset_fromlist
from cumin.backends import BaseQuery, InvalidQueryError
CATEGORIES = ('C', 'F', 'O', 'P', 'R')
""":py:func:`tuple`: available categories in the grammar.
* ``C``: shortcut for querying resources of type ``Class``, equivalent of `R:Class = class_path``.
* ``F``: for querying facts.
* ``O``: shortcut for querying resources of type ``Class`` that starts with ``Role::``.
* ``P``: shortcut for querying resources of type ``Class`` that starts with ``Profile::``.
* ``R``: for querying generic resources.
"""
OPERATORS = ('=', '>=', '<=', '<', '>', '~')
""":py:func:`tuple`: available operators in the grammar, the same available in PuppetDB API.
The ``~`` one is used for regex matching.
"""
[docs]class ParsedString:
"""Simple string wrapper which can communicate if a string should be enquoted downstream."""
def __init__(self, string, is_quoted):
"""Constructor for ParsedString.
Arguments:
string (str): The string to store in this object.
is_quoted (bool): Whether the output should be quoted when this is converted to a string.
"""
self.string = str(string)
self.is_quoted = is_quoted
def __str__(self):
"""Return a string version of this value, enquoted or not based on the is_quoted property."""
if self.is_quoted:
return '"{}"'.format(self.string)
return self.string
[docs] def capwords(self, sep):
"""Perform capwords operation on internal value and return a new ParsedString.
:Parameters:
according to :py:meth:`string.capwords`.
"""
return ParsedString(capwords(self.string, sep), self.is_quoted)
[docs] def replace(self, old, new, count=-1):
"""Perform replace operation on internal value and return a new ParsedString.
:Parameters:
according to :py:meth:`str.replace`.
"""
return ParsedString(self.string.replace(old, new, count), self.is_quoted)
[docs]def grammar(): # pylint: disable=too-many-locals
"""Define the query grammar.
Backus-Naur form (BNF) of the grammar::
<grammar> ::= <item> | <item> <and_or> <grammar>
<item> ::= [<neg>] <query-token> | [<neg>] "(" <grammar> ")"
<query-token> ::= <token> | <hosts>
<token> ::= <category>:<key> [<operator> <value>]
<value> ::= <numeric> | <bareword> | <quoted_string> | <unquoted_string>
Given that the pyparsing library defines the grammar in a BNF-like style, for the details of the tokens not
specified above check directly the source code.
Returns:
pyparsing.ParserElement: the grammar parser.
"""
# Boolean operators
and_or = (pp.CaselessKeyword('and') | pp.CaselessKeyword('or'))('bool')
# 'neg' is used as label to allow the use of dot notation, 'not' is a reserved word in Python
neg = pp.CaselessKeyword('not')('neg')
operator = pp.oneOf(OPERATORS, caseless=True)('operator') # Comparison operators
quoted_string = pp.quotedString.copy().addParseAction(pp.removeQuotes) # Both single and double quotes are allowed
# Parentheses
lpar = pp.Literal('(')('open_subgroup')
rpar = pp.Literal(')')('close_subgroup')
# Hosts selection: glob (*) and clustershell (,!&^[]) syntaxes are allowed:
# i.e. host10[10-42].*.domain
hosts = quoted_string | (~(and_or | neg) + pp.Word(pp.alphanums + '-_.*,!&^[]'))
# Key-value token for allowed categories using the available comparison operators
# i.e. F:key = value
category = pp.oneOf(CATEGORIES, caseless=True)('category')
key = pp.Word(pp.alphanums + '-_.%@:')('key')
selector = pp.Combine(category + ':' + key) # i.e. F:key
# All printables characters except the parentheses that are part of this or the global grammar
all_but_par = ''.join([c for c in pp.printables if c not in ('(', ')', '{', '}')])
# PuppetDB accepts JSON Atoms
bareword = pp.oneOf(('true', 'false'))
# octal numbers are bare numerics that lead with 0.
octal = pp.Word("0", "01234567", min=2).addParseAction(lambda toks: int(toks[0], 8))
# hex integers are in the format 0x[0-9A-F]+
hexadecimal = pp.Regex(r'0x[0-9A-F]+', flags=IGNORECASE).addParseAction(lambda toks: int(toks[0], 16))
number = pp.pyparsing_common.number
# label indicates post-processing needed (value = nonquoted, quoted=quoted)
value = (hexadecimal ^ octal ^ number ^ bareword)('value') ^ (quoted_string ^ pp.Word(all_but_par))('quoted')
token = selector + pp.Optional(operator + value)
# Final grammar, see the docstring for its BNF based on the tokens defined above
# Groups are used to split the parsed results for an easy access
full_grammar = pp.Forward()
item = pp.Group(pp.Optional(neg) + (token | hosts('hosts'))) | pp.Group(
pp.Optional(neg) + lpar + full_grammar + rpar)
full_grammar << item + pp.ZeroOrMore(pp.Group(and_or) + full_grammar) # pylint: disable=expression-not-assigned
return full_grammar
[docs]class PuppetDBQuery(BaseQuery):
"""PuppetDB query builder.
The `puppetdb` backend allow to use an existing PuppetDB instance for the hosts selection.
The supported PuppetDB API versions are 3 and 4. It can be specified via the api_version configuration key, if
not configured, the v4 will be used.
* Each query part can be composed with the others using boolean operators (``and``, ``or``, ``not``)
* Multiple query parts can be grouped together with parentheses (``(``, ``)``).
* A query part can be of two different types:
* ``Hostname matching``: this is a simple string that be used to match directly the hostname of the hosts in the
selected backend. It allows for glob expansion (``*``) and the use of the powerful
:py:class:`ClusterShell.NodeSet.NodeSet`.
* ``Category matching``: an identifier composed by a category, a colon and a key, followed by a comparison
operator and a value, as in ``F:key = value``.
* Values may be of various types supported by PuppetDB (numerics, boolean, and strings) for example:
* Booleans: ``true``, ``false``
* Strings: ``'a string'``, and unquoted single words that aren't ``true`` or ``false`` and do not start with an
integer.
* Numeric values: ``15``, ``23.5``, ``0``, ``0xfa`` *Note: hexadecimal and octal numbers are supported by cumin
but converted into normal integers. Some fields in PuppetDB may have hex or octal stored as strings, and should
be quoted such as* ``'0xfa'``.
*Note: PuppetDB may or may not support a particular value type for a particular resource.*
Some query examples:
* All hosts: ``*``
* Hosts globbing: ``host10*``
* :py:class:`ClusterShell.NodeSet.NodeSet` syntax for hosts expansion: ``host10[10-42].domain``
* Category based key-value selection:
* ``R:Resource::Name``: query all the hosts that have a resource of type `Resource::Name`.
* ``R:Resource::Name = 'resource-title'``: query all the hosts that have a resource of type `Resource::Name`
whose title is ``resource-title``. For example ``R:Class = MyModule::MyClass``.
* ``R:Resource::Name@field = 'some-value'``: query all the hosts that have a resource of type ``Resource::Name``
whose field ``field`` has the value ``some-value``. The valid fields are: ``tag``, ``certname``, ``type``,
``title``, ``exported``, ``file``, ``line``. The previous syntax is a shortcut for this one with the field
``title``.
* ``R:Resource::Name%param = 'some-value'``: query all the hosts that have a resource of type ``Resource::Name``
whose parameter ``param`` has the value ``some-value``.
* ``C:Class::Name``: special shortcut to query all the hosts that have a resource of type ``Class`` whose name
is ``Class::Name``. The ``Class::Name`` part is completely arbitrary and depends on the puppet hierarchy
chosen. It's equivalent to ``R:Class = Class::Name``, with the addition that the ``param`` and ``field``
selectors described above can be used directly without the need to add another condition.
* ``O:Module::Name``: special shortcut to query all the hosts that have a resource of type ``Class`` whose name
is ``Role::Module::Name``. The ``Module::Name`` part is completely arbitrary and depends on the puppet
hierarchy chosen. It's equivalent to ``R:Class = Role::Module::Name``, with the addition that the ``param`` and
``field`` selectors described above can be used directly without the need to add another condition, although
usually roles should not have parameters in the role/profile Puppet paradigm.
* ``P:Module::Name``: special shortcut to query all the hosts that have a resource of type ``Class`` whose name
is ``Profile::Module::Name``. The ``Module::Name`` part is completely arbitrary and depends on the puppet
hierarchy chosen. It's equivalent to ``R:Class = Profile::Module::Name``, with the addition that the ``param``
and ``field`` selectors described above can be used directly without the need to add another condition.
* ``F:FactName = value``: query all the hosts that have a fact ``FactName``, as reported by facter, with the
value ``value``.
* Mixed facts/resources queries are not supported, but the same result can be achieved using the main grammar
with multiple subqueries for the PuppetDB backend.
* All hosts with physicalcorecount fact greater than 2: ``F:physicalcorecount > 2``
* A complex selection for facts:
``host10[10-42].*.domain or (not F:key1 = value1 and host10*) or (F:key2 > value2 and F:key3 ~ '^value[0-9]+')``
"""
base_url_template = '{scheme}://{host}:{port}'
""":py:class:`str`: string template in the :py:meth:`str.format` style used to generate the base URL of the
PuppetDB server."""
endpoints = {'C': 'resources', 'F': 'nodes', 'O': 'resources', 'P': 'resources', 'R': 'resources'}
""":py:class:`dict`: dictionary with the mapping of the available categories in the grammar to the PuppetDB API
endpoints."""
category_prefixes = {'C': '', 'O': 'Role', 'P': 'Profile'}
""":py:class:`dict`: dictionary with the mapping of special categories to title prefixes."""
grammar = grammar()
""":py:class:`pyparsing.ParserElement`: load the grammar parser only once in a singleton-like way."""
def __init__(self, config):
"""Query constructor for the PuppetDB backend.
:Parameters:
according to parent :py:meth:`cumin.backends.BaseQuery.__init__`.
"""
super().__init__(config)
self.grouped_tokens = None
self.current_group = self.grouped_tokens
self._endpoint = None
puppetdb_config = self.config.get('puppetdb', {})
base_url = self.base_url_template.format(
scheme=puppetdb_config.get('scheme', 'https'),
host=puppetdb_config.get('host', 'localhost'),
port=puppetdb_config.get('port', 443))
self.api_version = puppetdb_config.get('api_version', 4)
if self.api_version == 3:
self.url = base_url + '/v3/'
self.hosts_keys = {'nodes': 'name', 'resources': 'certname'}
elif self.api_version == 4:
self.url = base_url + '/pdb/query/v4/'
self.hosts_keys = {'nodes': 'certname', 'resources': 'certname'}
else:
raise InvalidQueryError('Unsupported PuppetDB API version {ver}'.format(ver=self.api_version))
for exception in puppetdb_config.get('urllib3_disable_warnings', []):
urllib3.disable_warnings(category=getattr(urllib3.exceptions, exception))
@property
def endpoint(self):
"""Endpoint in the PuppetDB API for the current query.
:Getter:
Returns the current `endpoint` or a default value if not set.
:Setter:
:py:class:`str`: the value to set the `endpoint` to.
Raises:
cumin.backends.InvalidQueryError: if trying to set it to an invalid `endpoint` or mixing endpoints in a
single query.
"""
return self._endpoint or 'nodes'
@endpoint.setter
def endpoint(self, value):
"""Setter for the `endpoint` property. The relative documentation is in the getter."""
if value not in self.endpoints.values():
raise InvalidQueryError("Invalid value '{endpoint}' for endpoint property".format(endpoint=value))
if self._endpoint is not None and value != self._endpoint:
raise InvalidQueryError('Mixed endpoints are not supported, use the global grammar to mix them.')
self._endpoint = value
[docs] def _open_subgroup(self):
"""Handle subgroup opening."""
token = PuppetDBQuery._get_grouped_tokens()
token['parent'] = self.current_group
self.current_group['tokens'].append(token)
self.current_group = token
[docs] def _close_subgroup(self):
"""Handle subgroup closing."""
self.current_group = self.current_group['parent']
[docs] @staticmethod
def _get_grouped_tokens():
"""Return an empty grouped tokens structure.
Returns:
dict: the dictionary with the empty grouped tokens structure.
"""
return {'parent': None, 'bool': None, 'tokens': []}
[docs] def _build(self, query_string):
"""Override parent class _build method to reset tokens and add logging.
:Parameters:
according to parent :py:meth:`cumin.backends.BaseQuery._build`.
"""
self.grouped_tokens = PuppetDBQuery._get_grouped_tokens()
self.current_group = self.grouped_tokens
super()._build(query_string)
self.logger.trace('Query tokens: %s', self.grouped_tokens)
[docs] def _execute(self):
"""Concrete implementation of parent abstract method.
:Parameters:
according to parent :py:meth:`cumin.backends.BaseQuery._execute`.
Returns:
ClusterShell.NodeSet.NodeSet: with the FQDNs of the matching hosts.
"""
query = self._get_query_string(group=self.grouped_tokens).format(host_key=self.hosts_keys[self.endpoint])
hosts = self._api_call(query)
unique_hosts = nodeset_fromlist([host[self.hosts_keys[self.endpoint]] for host in hosts])
self.logger.debug("Queried puppetdb for '%s', got '%d' results.", query, len(unique_hosts))
return unique_hosts
[docs] def _add_category(self, category, key, value=None, operator='=', neg=False):
"""Add a category token to the query 'F:key = value'.
Arguments:
category (str): the category of the token, one of :py:const:`CATEGORIES`.
key (str): the key for this category.
value (str, optional): the value to match, if not specified the key itself will be matched.
operator (str, optional): the comparison operator to use, one of :py:const:`OPERATORS`.
neg (bool, optional): whether the token must be negated.
Raises:
cumin.backends.InvalidQueryError: on internal parsing error.
"""
self.endpoint = self.endpoints[category]
if operator == '~':
# PuppetDB API requires to escape every backslash
# See: https://puppet.com/docs/puppetdb/4.4/api/query/v4/ast.html#regexp-match
value = value.replace('\\', '\\\\')
if category in ('C', 'O', 'P'):
query = self._get_special_resource_query(category, key, value, operator)
elif category == 'R':
query = self._get_resource_query(key, value, operator)
elif category == 'F':
query = '["{op}", ["fact", "{key}"], {val}]'.format(op=operator, key=key, val=value)
else: # pragma: no cover - this should never happen
raise InvalidQueryError(
"Got invalid category '{category}', one of F|O|P|R expected".format(category=category))
if neg:
query = '["not", {query}]'.format(query=query)
self.current_group['tokens'].append(query)
[docs] def _add_hosts(self, hosts, neg=False):
"""Add a list of hosts to the query.
Arguments:
hosts (ClusterShell.NodeSet.NodeSet): with the list of hosts to search.
neg (bool, optional): whether the token must be negated.
"""
if not hosts:
return
hosts_tokens = []
for host in hosts:
operator = '='
# Convert a glob expansion into a regex
if '*' in host:
operator = '~'
host = r'^' + host.replace('.', r'\\.').replace('*', '.*') + r'$'
hosts_tokens.append('["{op}", "{{host_key}}", "{host}"]'.format(op=operator, host=host))
query = '["or", {hosts}]'.format(hosts=', '.join(hosts_tokens))
if neg:
query = '["not", {query}]'.format(query=query)
self.current_group['tokens'].append(query)
[docs] def _parse_token(self, token):
"""Concrete implementation of parent abstract method.
:Parameters:
according to parent :py:meth:`cumin.backends.BaseQuery._parse_token`.
Raises:
cumin.backends.InvalidQueryError: on internal parsing error.
"""
if isinstance(token, str):
return
token_dict = token.asDict()
# post-process types
if 'quoted' in token_dict:
token_dict['value'] = ParsedString(token_dict['quoted'], True)
del token_dict['quoted']
elif 'value' in token_dict:
token_dict['value'] = ParsedString(token_dict['value'], False)
# Based on the token type build the corresponding query object
if 'open_subgroup' in token_dict:
self._open_subgroup()
for subtoken in token:
self._parse_token(subtoken)
self._close_subgroup()
elif 'bool' in token_dict:
self._add_bool(token_dict['bool'])
elif 'hosts' in token_dict:
token_dict['hosts'] = nodeset(token_dict['hosts'])
self._add_hosts(**token_dict)
elif 'category' in token_dict:
self._add_category(**token_dict)
else: # pragma: no cover - this should never happen
raise InvalidQueryError(
"No valid key found in token, one of bool|hosts|category expected: {token}".format(token=token_dict))
[docs] def _get_resource_query(self, key, value=None, operator='='): # pylint: disable=no-self-use
"""Build a resource query based on the parameters, resolving the special cases for ``%params`` and ``@field``.
Arguments:
key (str): the key of the resource.
value (str, optional): the value to match, if not specified the key itself will be matched.
operator (str, optional): the comparison operator to use, one of :py:const:`OPERATORS`.
Returns:
str: the resource query.
Raises:
cumin.backends.InvalidQueryError: on invalid combinations of parameters.
"""
if all(char in key for char in ('%', '@')):
raise InvalidQueryError(("Resource key cannot contain both '%' (query a resource's parameter) and '@' "
"(query a resource's field)"))
if '%' in key:
# Querying a specific parameter of the resource
if operator == '~' and self.api_version == 3:
raise InvalidQueryError('Regex operations are not supported in PuppetDB API v3 for resource parameters')
key, param = key.split('%', 1)
query_part = ', ["{op}", ["parameter", "{param}"], {value}]'.format(op=operator, param=param, value=value)
elif '@' in key:
# Querying a specific field of the resource
key, field = key.split('@', 1)
query_part = ', ["{op}", "{field}", {value}]'.format(op=operator, field=field, value=value)
elif value is None:
# Querying a specific resource type
query_part = ''
else:
# Querying a specific resource title
if key.lower() == 'class' and operator != '~':
value = value.capwords('::') # Auto ucfirst the class title
query_part = ', ["{op}", "title", {value}]'.format(op=operator, value=value)
query = '["and", ["=", "type", "{type}"]{query_part}]'.format(type=capwords(key, '::'), query_part=query_part)
return query
[docs] def _get_special_resource_query(self, category, key, value, operator):
"""Build a query for Roles and Profiles, resolving the special cases for ``%params`` and ``@field``.
Arguments:
category (str): the category of the token, one of :py:data:`category_prefixes` keys.
key (str): the key of the resource to use as a suffix for the Class title matching.
value (str, optional): the value to match in case ``%params`` or ``@field`` is specified.
operator (str, optional): the comparison operator to use if there is a value, one of :py:const:`OPERATORS`.
Returns:
str: the resource query.
Raises:
cumin.backends.InvalidQueryError: on invalid combinations of parameters.
"""
if all(char in key for char in ('%', '@')):
raise InvalidQueryError(("Resource key cannot contain both '%' (query a resource's parameter) and '@' "
"(query a resource's field)"))
if '%' in key:
special = '%'
key, param = key.split('%')
elif '@' in key:
special = '@'
key, param = key.split('@')
else:
special = None
if value is not None:
raise InvalidQueryError(("Invalid query of the form '{category}:key = value'. The matching of a value "
"is accepted only when using %param or @field.").format(category=category))
if self.category_prefixes[category]:
title = ParsedString('{prefix}::{key}'.format(prefix=self.category_prefixes[category], key=key), True)
else:
title = ParsedString(key, True)
query = self._get_resource_query('Class', title, '=')
if special is not None:
param_query = self._get_resource_query(''.join(('Class', special, param)), value, operator)
query = '["and", {query}, {param_query}]'.format(query=query, param_query=param_query)
return query
[docs] def _get_query_string(self, group):
"""Recursively build and return the PuppetDB query string.
Arguments:
group (dict): a dictionary with the grouped tokens.
Returns:
str: the query string for the PuppetDB API.
"""
if group['bool']:
query = '["{bool}", '.format(bool=group['bool'])
else:
query = ''
last_index = len(group['tokens'])
for i, token in enumerate(group['tokens']):
if isinstance(token, dict):
query += self._get_query_string(group=token)
else:
query += token
if i < last_index - 1:
query += ', '
if group['bool']:
query += ']'
return query
[docs] def _add_bool(self, bool_op):
"""Add a boolean AND or OR query block to the query and validate logic.
Arguments:
bool_op (str): the boolean operator to add to the query: ``and``, ``or``.
Raises:
cumin.backends.InvalidQueryError: if an invalid boolean operator was found.
"""
if self.current_group['bool'] is None:
self.current_group['bool'] = bool_op
elif self.current_group['bool'] == bool_op:
return
else:
raise InvalidQueryError("Got unexpected '{bool}' boolean operator, current operator was '{current}'".format(
bool=bool_op, current=self.current_group['bool']))
[docs] def _api_call(self, query):
"""Execute a query to PuppetDB API and return the parsed JSON.
Arguments:
query (str): the query parameter to send to the PuppetDB API.
Raises:
requests.HTTPError: if the PuppetDB API call fails.
"""
if self.api_version == 3:
resources = requests.get(self.url + self.endpoint, params={'query': query}, verify=True)
else:
resources = requests.post(self.url + self.endpoint, json={'query': query}, verify=True)
resources.raise_for_status()
return resources.json()
GRAMMAR_PREFIX = 'P'
""":py:class:`str`: the prefix associate to this grammar, to register this backend into the general grammar.
Required by the backend auto-loader in :py:meth:`cumin.grammar.get_registered_backends`."""
query_class = PuppetDBQuery # pylint: disable=invalid-name
"""Required by the backend auto-loader in :py:meth:`cumin.grammar.get_registered_backends`."""