Skip to content

Commit

Permalink
Fix /var/lib/nova access for NFS shares
Browse files Browse the repository at this point in the history
When /var/lib/nova is mounted via NFS, Kolla driven permissions
change over that directory breaks access to it for VM instances.

That had been already solved in TripleO by introducing a custom
init container that calls a script for smart ownership changes.

Reintroduce that script and tests from TripleO as is, and add a
Nova EDPM init container to call it instead of recursive chown.

Signed-off-by: Bohdan Dobrelia <[email protected]>
  • Loading branch information
bogdando committed Nov 5, 2024
1 parent 25122fc commit 9f1464a
Show file tree
Hide file tree
Showing 9 changed files with 739 additions and 6 deletions.
12 changes: 12 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
---
repos:
- repo: local
hooks:
- id: pytest
name: pytest
entry: pytest tests/test_nova_statedir_ownership.py
language: python
types: [python]
pass_filenames: false
always_run: true
additional_dependencies:
- pytest
- oslotest
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
Expand Down
2 changes: 1 addition & 1 deletion molecule/test-helpers/verify_podman.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
block:
- name: Check if podman container exists {{ item }}
become: true
ansible.builtin.command: podman ps -a --filter name={{item}} --format \{\{.Names\}\}
ansible.builtin.command: podman ps -a --filter name=^{{item}}\$ --format \{\{.Names\}\}
register: containers
- name: Assert podman container exists {{ item }}
ansible.builtin.assert:
Expand Down
7 changes: 7 additions & 0 deletions roles/edpm_nova/handlers/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,10 @@
state: restarted
name: "edpm_nova_compute.service"
listen: "Restart nova"

- name: Restart nova init container
become: true
containers.podman.podman_container:
name: nova_compute_init
state: started
listen: "Restart nova init"
2 changes: 2 additions & 0 deletions roles/edpm_nova/tasks/configure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@
# openstack Caracal)
# https://blueprints.launchpad.net/nova/+spec/libvirt-migrate-with-hostname-instead-of-ip
- {"src": "02-nova-host-specific.conf.j2", "dest": "02-nova-host-specific.conf"}
- {"src": "nova_statedir_ownership.py", "dest": "nova_statedir_ownership.py"}
notify:
- Restart nova init
- Restart nova

- name: Create .ssh directory for the nova user on the host
Expand Down
24 changes: 24 additions & 0 deletions roles/edpm_nova/tasks/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,30 @@
notify:
- Restart nova

- name: Render nova init container
tags:
- install
- nova
ansible.builtin.template:
src: "nova_compute_init.json.j2"
dest: "/var/lib/openstack/config/containers/nova_compute_init.json"
setype: "container_file_t"
mode: "0700"
notify:
- Restart nova init

- name: Deploy nova init container
tags:
- install
- nova
ansible.builtin.include_role:
name: osp.edpm.edpm_container_manage
vars:
edpm_container_manage_config: '/var/lib/openstack/config/containers'
edpm_container_manage_healthcheck_disabled: true
edpm_container_manage_config_patterns: 'nova_compute_init.json'
edpm_container_manage_clean_orphans: false

- name: Deploy nova container
tags:
- install
Expand Down
5 changes: 0 additions & 5 deletions roles/edpm_nova/templates/config.json.j2
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@
"owner": "nova:nova",
"perm:": "0600"
},
{
"path": "/var/lib/nova",
"owner": "nova:nova",
"recurse": true
},
{
"path": "/var/lib/nova/.ssh/",
"owner": "nova:nova",
Expand Down
20 changes: 20 additions & 0 deletions roles/edpm_nova/templates/nova_compute_init.json.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"image": "{{ edpm_nova_compute_image }}",
"privileged": false,
"user": "root",
"restart": "never",
"command": "bash -c $* -- eval python3 /sbin/nova_statedir_ownership.py | logger -t nova_compute_init",
"net": "none",
"security_opt": ["label=disable"],
"detach": false,
"environment": {
"NOVA_STATEDIR_OWNERSHIP_SKIP": "/var/lib/nova/compute_id",
"__OS_DEBUG": false
},
"volumes": [
"/dev/log:/dev/log",
"/var/lib/nova:/var/lib/nova:shared",
"/var/lib/_nova_secontext:/var/lib/_nova_secontext:shared,z",
"/var/lib/openstack/config/nova/nova_statedir_ownership.py:/sbin/nova_statedir_ownership.py:z"
]
}
247 changes: 247 additions & 0 deletions roles/edpm_nova/templates/nova_statedir_ownership.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
#!/usr/bin/env python
#
# Copyright 2018 Red Hat Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import pwd
import selinux
import stat
import sys

debug = os.getenv('__OS_DEBUG', 'false')

if debug.lower() == 'true':
loglevel = logging.DEBUG
else:
loglevel = logging.INFO

logging.basicConfig(stream=sys.stdout, level=loglevel)
LOG = logging.getLogger('nova_statedir')


class PathManager(object):
"""Helper class to manipulate ownership of a given path"""
def __init__(self, path):
self.path = path
self.uid = None
self.gid = None
self.is_dir = None
self.secontext = None
self._update()

def _update(self):
try:
statinfo = os.stat(self.path)
self.is_dir = stat.S_ISDIR(statinfo.st_mode)
self.uid = statinfo.st_uid
self.gid = statinfo.st_gid
self.secontext = selinux.lgetfilecon(self.path)[1]
except Exception:
LOG.exception('Could not update metadata for %s', self.path)
raise

def __str__(self):
return "uid: {} gid: {} path: {}{}".format(
self.uid,
self.gid,
self.path,
'/' if self.is_dir else ''
)

def has_owner(self, uid, gid):
return self.uid == uid and self.gid == gid

def has_either(self, uid, gid):
return self.uid == uid or self.gid == gid

def chown(self, uid, gid):
target_uid = -1
target_gid = -1
if self.uid != uid:
target_uid = uid
if self.gid != gid:
target_gid = gid
if (target_uid, target_gid) != (-1, -1):
LOG.info('Changing ownership of %s from %d:%d to %d:%d',
self.path,
self.uid,
self.gid,
self.uid if target_uid == -1 else target_uid,
self.gid if target_gid == -1 else target_gid)
try:
os.chown(self.path, target_uid, target_gid)
self._update()
except Exception:
LOG.exception('Could not change ownership of %s: ',
self.path)
raise
else:
LOG.info('Ownership of %s already %d:%d',
self.path,
uid,
gid)

def chcon(self, context):
# If dir returns whether to recursively set context
try:
try:
selinux.lsetfilecon(self.path, context)
LOG.info('Setting selinux context of %s to %s',
self.path, context)
return True
except OSError as e:
if self.is_dir and e.errno == 95:
# Operation not supported, assume NFS mount and skip
LOG.info('Setting selinux context not supported for %s',
self.path)
return False
else:
raise
except Exception:
LOG.exception('Could not set selinux context of %s to %s:',
self.path, context)
raise


class NovaStatedirOwnershipManager(object):
"""Class to manipulate the ownership of the nova statedir (/var/lib/nova).
The nova uid/gid differ on the host and container images. An upgrade
that switches from host systemd services to docker requires a change in
ownership. Previously this was a naive recursive chown, however this
causes issues if nova instance are shared via an NFS mount: any open
filehandles in qemu/libvirt fail with an I/O error (LP1778465).
Instead the upgrade/FFU ansible tasks now lay down a marker file when
stopping and disabling the host systemd services. We use this file to
determine the host nova uid/gid. We then walk the tree and update any
files that have the host uid/gid to the docker nova uid/gid. As files
owned by root/qemu etc... are ignored this avoids the issues with open
filehandles. The marker is removed once the tree has been walked.
For subsequent runs, or for a new deployment, we simply ensure that the
docker nova user/group owns all directories. This is required as the
directories are created with root ownership in host_prep_tasks (the
docker nova uid/gid is not known in this context).
"""
def __init__(self, statedir, upgrade_marker='upgrade_marker',
nova_user='nova', secontext_marker='../_nova_secontext',
exclude_paths=None):
self.statedir = statedir
self.nova_user = nova_user

self.upgrade_marker_path = os.path.join(statedir, upgrade_marker)
self.secontext_marker_path = os.path.normpath(os.path.join(statedir, secontext_marker))
self.upgrade = os.path.exists(self.upgrade_marker_path)

self.exclude_paths = [self.upgrade_marker_path]
if exclude_paths is not None:
for p in exclude_paths:
if not p.startswith(os.path.sep):
p = os.path.join(self.statedir, p)
self.exclude_paths.append(p)

self.target_uid, self.target_gid = self._get_nova_ids()
self.previous_uid, self.previous_gid = self._get_previous_nova_ids()
self.id_change = (self.target_uid, self.target_gid) != \
(self.previous_uid, self.previous_gid)
self.target_secontext = self._get_secontext()

def _get_nova_ids(self):
nova_uid, nova_gid = pwd.getpwnam(self.nova_user)[2:4]
return nova_uid, nova_gid

def _get_previous_nova_ids(self):
if self.upgrade:
statinfo = os.stat(self.upgrade_marker_path)
return statinfo.st_uid, statinfo.st_gid
else:
return self._get_nova_ids()

def _get_secontext(self):
if os.path.exists(self.secontext_marker_path):
return selinux.lgetfilecon(self.secontext_marker_path)[1]
else:
return None

def _walk(self, top, chcon=True):
for f in os.listdir(top):
pathname = os.path.join(top, f)

if pathname in self.exclude_paths:
continue

try:
pathinfo = PathManager(pathname)
LOG.info("Checking %s", pathinfo)
if pathinfo.is_dir:
# Always chown the directories
pathinfo.chown(self.target_uid, self.target_gid)
chcon_r = chcon
if chcon:
chcon_r = pathinfo.chcon(self.target_secontext)
self._walk(pathname, chcon_r)
elif self.id_change:
# Only chown files if it's an upgrade and the file is owned by
# the host nova uid/gid
pathinfo.chown(
self.target_uid if pathinfo.uid == self.previous_uid
else pathinfo.uid,
self.target_gid if pathinfo.gid == self.previous_gid
else pathinfo.gid
)
if chcon:
pathinfo.chcon(self.target_secontext)
except Exception:
# Likely to have been caused by external systems
# interacting with this directory tree,
# especially on NFS e.g snapshot dirs.
# Just ignore it and continue on to the next entry
continue

def run(self):
LOG.info('Applying nova statedir ownership')
LOG.info('Target ownership for %s: %d:%d',
self.statedir,
self.target_uid,
self.target_gid)

pathinfo = PathManager(self.statedir)
LOG.info("Checking %s", pathinfo)
pathinfo.chown(self.target_uid, self.target_gid)
chcon = self.target_secontext is not None

if chcon:
pathinfo.chcon(self.target_secontext)

self._walk(self.statedir, chcon)

if self.upgrade:
LOG.info('Removing upgrade_marker %s',
self.upgrade_marker_path)
os.unlink(self.upgrade_marker_path)

LOG.info('Nova statedir ownership complete')


def get_exclude_paths():
exclude_paths = os.environ.get('NOVA_STATEDIR_OWNERSHIP_SKIP')
if exclude_paths is not None:
exclude_paths = exclude_paths.split(os.pathsep)
return exclude_paths


if __name__ == '__main__':
NovaStatedirOwnershipManager('/var/lib/nova', exclude_paths=get_exclude_paths()).run()
Loading

0 comments on commit 9f1464a

Please sign in to comment.