summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch')
-rw-r--r--0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch201
1 files changed, 201 insertions, 0 deletions
diff --git a/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch b/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch
new file mode 100644
index 0000000..2f4b734
--- /dev/null
+++ b/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch
@@ -0,0 +1,201 @@
+From 8dab3f79b122e69cbcdebca72cdc14f004ee2193 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:27:37 +0200
+Subject: [PATCH 23/32] x86/pv: Fix ABAC cmpxchg() race in _get_page_type()
+
+_get_page_type() suffers from a race condition where it incorrectly assumes
+that because 'x' was read and a subsequent a cmpxchg() succeeds, the type
+cannot have changed in-between. Consider:
+
+CPU A:
+ 1. Creates an L2e referencing pg
+ `-> _get_page_type(pg, PGT_l1_page_table), sees count 0, type PGT_writable_page
+ 2. Issues flush_tlb_mask()
+CPU B:
+ 3. Creates a writeable mapping of pg
+ `-> _get_page_type(pg, PGT_writable_page), count increases to 1
+ 4. Writes into new mapping, creating a TLB entry for pg
+ 5. Removes the writeable mapping of pg
+ `-> _put_page_type(pg), count goes back down to 0
+CPU A:
+ 7. Issues cmpxchg(), setting count 1, type PGT_l1_page_table
+
+CPU B now has a writeable mapping to pg, which Xen believes is a pagetable and
+suitably protected (i.e. read-only). The TLB flush in step 2 must be deferred
+until after the guest is prohibited from creating new writeable mappings,
+which is after step 7.
+
+Defer all safety actions until after the cmpxchg() has successfully taken the
+intended typeref, because that is what prevents concurrent users from using
+the old type.
+
+Also remove the early validation for writeable and shared pages. This removes
+race conditions where one half of a parallel mapping attempt can return
+successfully before:
+ * The IOMMU pagetables are in sync with the new page type
+ * Writeable mappings to shared pages have been torn down
+
+This is part of XSA-401 / CVE-2022-26362.
+
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+master commit: 8cc5036bc385112a82f1faff27a0970e6440dfed
+master date: 2022-06-09 14:21:04 +0200
+---
+ xen/arch/x86/mm.c | 116 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 67 insertions(+), 49 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 79ad7fdd2b82..c6429b0f749a 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2933,56 +2933,12 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ * Type changes are permitted when the typeref is 0. If the type
+ * actually changes, the page needs re-validating.
+ */
+- struct domain *d = page_get_owner(page);
+-
+- if ( d && shadow_mode_enabled(d) )
+- shadow_prepare_page_type_change(d, page, type);
+
+ ASSERT(!(x & PGT_pae_xen_l2));
+ if ( (x & PGT_type_mask) != type )
+ {
+- /*
+- * On type change we check to flush stale TLB entries. It is
+- * vital that no other CPUs are left with writeable mappings
+- * to a frame which is intending to become pgtable/segdesc.
+- */
+- cpumask_t *mask = this_cpu(scratch_cpumask);
+-
+- BUG_ON(in_irq());
+- cpumask_copy(mask, d->dirty_cpumask);
+-
+- /* Don't flush if the timestamp is old enough */
+- tlbflush_filter(mask, page->tlbflush_timestamp);
+-
+- if ( unlikely(!cpumask_empty(mask)) &&
+- /* Shadow mode: track only writable pages. */
+- (!shadow_mode_enabled(d) ||
+- ((nx & PGT_type_mask) == PGT_writable_page)) )
+- {
+- perfc_incr(need_flush_tlb_flush);
+- /*
+- * If page was a page table make sure the flush is
+- * performed using an IPI in order to avoid changing the
+- * type of a page table page under the feet of
+- * spurious_page_fault().
+- */
+- flush_mask(mask,
+- (x & PGT_type_mask) &&
+- (x & PGT_type_mask) <= PGT_root_page_table
+- ? FLUSH_TLB | FLUSH_FORCE_IPI
+- : FLUSH_TLB);
+- }
+-
+- /* We lose existing type and validity. */
+ nx &= ~(PGT_type_mask | PGT_validated);
+ nx |= type;
+-
+- /*
+- * No special validation needed for writable pages.
+- * Page tables and GDT/LDT need to be scanned for validity.
+- */
+- if ( type == PGT_writable_page || type == PGT_shared_page )
+- nx |= PGT_validated;
+ }
+ }
+ else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
+@@ -3063,6 +3019,56 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ return -EINTR;
+ }
+
++ /*
++ * One typeref has been taken and is now globally visible.
++ *
++ * The page is either in the "validate locked" state (PGT_[type] | 1) or
++ * fully validated (PGT_[type] | PGT_validated | >0).
++ */
++
++ if ( unlikely((x & PGT_count_mask) == 0) )
++ {
++ struct domain *d = page_get_owner(page);
++
++ if ( d && shadow_mode_enabled(d) )
++ shadow_prepare_page_type_change(d, page, type);
++
++ if ( (x & PGT_type_mask) != type )
++ {
++ /*
++ * On type change we check to flush stale TLB entries. It is
++ * vital that no other CPUs are left with writeable mappings
++ * to a frame which is intending to become pgtable/segdesc.
++ */
++ cpumask_t *mask = this_cpu(scratch_cpumask);
++
++ BUG_ON(in_irq());
++ cpumask_copy(mask, d->dirty_cpumask);
++
++ /* Don't flush if the timestamp is old enough */
++ tlbflush_filter(mask, page->tlbflush_timestamp);
++
++ if ( unlikely(!cpumask_empty(mask)) &&
++ /* Shadow mode: track only writable pages. */
++ (!shadow_mode_enabled(d) ||
++ ((nx & PGT_type_mask) == PGT_writable_page)) )
++ {
++ perfc_incr(need_flush_tlb_flush);
++ /*
++ * If page was a page table make sure the flush is
++ * performed using an IPI in order to avoid changing the
++ * type of a page table page under the feet of
++ * spurious_page_fault().
++ */
++ flush_mask(mask,
++ (x & PGT_type_mask) &&
++ (x & PGT_type_mask) <= PGT_root_page_table
++ ? FLUSH_TLB | FLUSH_FORCE_IPI
++ : FLUSH_TLB);
++ }
++ }
++ }
++
+ if ( unlikely(((x & PGT_type_mask) == PGT_writable_page) !=
+ (type == PGT_writable_page)) )
+ {
+@@ -3091,13 +3097,25 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+
+ if ( unlikely(!(nx & PGT_validated)) )
+ {
+- if ( !(x & PGT_partial) )
++ /*
++ * No special validation needed for writable or shared pages. Page
++ * tables and GDT/LDT need to have their contents audited.
++ *
++ * per validate_page(), non-atomic updates are fine here.
++ */
++ if ( type == PGT_writable_page || type == PGT_shared_page )
++ page->u.inuse.type_info |= PGT_validated;
++ else
+ {
+- page->nr_validated_ptes = 0;
+- page->partial_flags = 0;
+- page->linear_pt_count = 0;
++ if ( !(x & PGT_partial) )
++ {
++ page->nr_validated_ptes = 0;
++ page->partial_flags = 0;
++ page->linear_pt_count = 0;
++ }
++
++ rc = validate_page(page, type, preemptible);
+ }
+- rc = validate_page(page, type, preemptible);
+ }
+
+ out:
+--
+2.35.1
+