On Tue, Mar 16, 2021 at 9:43 PM Mickaël Salaün mic@digikod.net wrote:
Using Landlock objects and ruleset, it is possible to tag inodes according to a process's domain.
[...]
+static void release_inode(struct landlock_object *const object)
__releases(object->lock)+{
struct inode *const inode = object->underobj;struct super_block *sb;if (!inode) {spin_unlock(&object->lock);return;}/** Protects against concurrent use by hook_sb_delete() of the reference* to the underlying inode.*/object->underobj = NULL;/** Makes sure that if the filesystem is concurrently unmounted,* hook_sb_delete() will wait for us to finish iput().*/sb = inode->i_sb;atomic_long_inc(&landlock_superblock(sb)->inode_refs);spin_unlock(&object->lock);/** Because object->underobj was not NULL, hook_sb_delete() and* get_inode_object() guarantee that it is safe to reset* landlock_inode(inode)->object while it is not NULL. It is therefore* not necessary to lock inode->i_lock.*/rcu_assign_pointer(landlock_inode(inode)->object, NULL);/** Now, new rules can safely be tied to @inode with get_inode_object().*/iput(inode);if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))wake_up_var(&landlock_superblock(sb)->inode_refs);+}
[...]
+static struct landlock_object *get_inode_object(struct inode *const inode) +{
struct landlock_object *object, *new_object;struct landlock_inode_security *inode_sec = landlock_inode(inode);rcu_read_lock();+retry:
object = rcu_dereference(inode_sec->object);if (object) {if (likely(refcount_inc_not_zero(&object->usage))) {rcu_read_unlock();return object;}/** We are racing with release_inode(), the object is going* away. Wait for release_inode(), then retry.*/spin_lock(&object->lock);spin_unlock(&object->lock);goto retry;}rcu_read_unlock();/** If there is no object tied to @inode, then create a new one (without* holding any locks).*/new_object = landlock_create_object(&landlock_fs_underops, inode);if (IS_ERR(new_object))return new_object;/* Protects against concurrent get_inode_object() calls. */spin_lock(&inode->i_lock);object = rcu_dereference_protected(inode_sec->object,lockdep_is_held(&inode->i_lock));
rcu_dereference_protected() requires that inode_sec->object is not concurrently changed, but I think another thread could call get_inode_object() while we're in landlock_create_object(), and then we could race with the NULL write in release_inode() here? (It wouldn't actually be a UAF though because we're not actually accessing `object` here.) Or am I missing a lock that prevents this?
In v28 this wasn't an issue because release_inode() was holding inode->i_lock (and object->lock) during the NULL store; but in v29 and this version the NULL store in release_inode() moved out of the locked region. I think you could just move the NULL store in release_inode() back up (and maybe add a comment explaining the locking rules for landlock_inode(...)->object)?
(Or alternatively you could use rcu_dereference_raw() with a comment explaining that the read pointer is only used to check for NULL-ness, and that it is guaranteed that the pointer can't change if it is NULL and we're holding the lock. But that'd be needlessly complicated, I think.)
if (unlikely(object)) {/* Someone else just created the object, bail out and retry. */spin_unlock(&inode->i_lock);kfree(new_object);rcu_read_lock();goto retry;}rcu_assign_pointer(inode_sec->object, new_object);/** @inode will be released by hook_sb_delete() on its superblock* shutdown.*/ihold(inode);spin_unlock(&inode->i_lock);return new_object;+}